|
- # Copyright 2022 Huawei Technologies Co., Ltd
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- # =======================================================================================
- """ image transform related """
- import random
- import math
-
- import cv2
- import numpy as np
-
-
- def random_perspective(
- img,
- targets=(),
- degrees=10,
- translate=0.1,
- scale=0.1,
- shear=10,
- perspective=0.0,
- border=(0, 0),
- ):
- """ random perspective for images"""
- height = img.shape[0] + border[0] * 2
- width = img.shape[1] + border[1] * 2
-
- # Center
- C = np.eye(3)
- C[0, 2] = -img.shape[1] / 2 # x translation (pixels)
- C[1, 2] = -img.shape[0] / 2 # y translation (pixels)
-
- # Rotation and Scale
- R = np.eye(3)
- a = random.uniform(-degrees, degrees)
- s = random.uniform(scale[0], scale[1])
- R[:2] = cv2.getRotationMatrix2D(angle=a, center=(0, 0), scale=s)
-
- # Shear
- S = np.eye(3)
- S[0, 1] = math.tan(random.uniform(-shear, shear) * math.pi / 180)
- S[1, 0] = math.tan(random.uniform(-shear, shear) * math.pi / 180)
-
- # Translation
- T = np.eye(3)
- T[0, 2] = (random.uniform(0.5 - translate, 0.5 + translate) * width) # x translation (pixels)
- T[1, 2] = (random.uniform(0.5 - translate, 0.5 + translate) * height) # y translation (pixels)
-
- # Combined rotation matrix
- M = T @ S @ R @ C # order of operations (right to left) is IMPORTANT
-
- if (border[0] != 0) or (border[1] != 0) or (M != np.eye(3)).any(): # image changed
- if perspective:
- img = cv2.warpPerspective(
- img, M, dsize=(width, height), borderValue=(114, 114, 114)
- )
- else: # affine
- img = cv2.warpAffine(
- img, M[:2], dsize=(width, height), borderValue=(114, 114, 114)
- )
-
- # Transform label coordinates
- n = len(targets)
- if n:
- xy = np.ones((n * 4, 3))
- xy[:, :2] = targets[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape(
- n * 4, 2
- )
- xy = xy @ M.T
- if perspective:
- xy = (xy[:, :2] / xy[:, 2:3]).reshape(n, 8)
- else:
- xy = xy[:, :2].reshape(n, 8)
-
- # create new boxes
- x = xy[:, [0, 2, 4, 6]]
- y = xy[:, [1, 3, 5, 7]]
- xy = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T
-
- # clip boxes
- xy[:, [0, 2]] = xy[:, [0, 2]].clip(0, width)
- xy[:, [1, 3]] = xy[:, [1, 3]].clip(0, height)
-
- # filter candidates
- i = box_candidates(box1=targets[:, :4].T * s, box2=xy.T)
- targets = targets[i]
- targets[:, :4] = xy[i]
-
- return img, targets
-
-
- def box_candidates(box1, box2, wh_thr=2, ar_thr=20, area_thr=0.2):
- # box1(4,n), box2(4,n)
- # Compute candidate boxes which include following 5 things:
- # box1 before augment, box2 after augment, wh_thr (pixels), aspect_ratio_thr, area_ratio
- w1, h1 = box1[2] - box1[0], box1[3] - box1[1]
- w2, h2 = box2[2] - box2[0], box2[3] - box2[1]
- ar = np.maximum(w2 / (h2 + 1e-16), h2 / (w2 + 1e-16)) # aspect ratio
- return (w2 > wh_thr) & (h2 > wh_thr) & (w2 * h2 / (w1 * h1 + 1e-16) > area_thr) & (ar < ar_thr) # candidates
-
-
- def augment_hsv(img, hgain=0.015, sgain=0.7, vgain=0.4):
- """ hsv augment """
- r = np.random.uniform(-1, 1, 3) * [hgain, sgain, vgain] + 1 # random gains
- hue, sat, val = cv2.split(cv2.cvtColor(img, cv2.COLOR_BGR2HSV))
- dtype = img.dtype
-
- x = np.arange(0, 256, dtype=np.int16)
- lut_hue = ((x * r[0]) % 180).astype(dtype)
- lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)
- lut_val = np.clip(x * r[2], 0, 255).astype(dtype)
-
- img_hsv = cv2.merge(
- (cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val))
- ).astype(dtype)
- cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img)
-
-
- def _mirror(image, boxes, prob=0.5):
- _, width, _ = image.shape
- if random.random() < prob:
- image = image[:, ::-1]
- boxes[:, 0::2] = width - boxes[:, 2::-2]
- return image, boxes
-
-
- def preproc(img, input_size, swap=(2, 0, 1)):
- """ padding image and transpose dim """
- if len(img.shape) == 3:
- padded_img = np.ones((input_size[0], input_size[1], 3), dtype=np.uint8) * 114
- else:
- padded_img = np.ones(input_size, dtype=np.uint8) * 114
- r = min(input_size[0] / img.shape[0], input_size[1] / img.shape[1])
- resized_img = cv2.resize(
- img,
- (int(img.shape[1] * r), int(img.shape[0] * r)),
- interpolation=cv2.INTER_LINEAR,
- ).astype(np.uint8)
- padded_img[: int(img.shape[0] * r), : int(img.shape[1] * r)] = resized_img
-
- padded_img = padded_img.transpose(swap)
- padded_img = np.ascontiguousarray(padded_img, dtype=np.float32)
- return padded_img, r
-
-
- class TrainTransform:
- """ image transform for training """
-
- def __init__(self, max_labels=50, flip_prob=0.5, hsv_prob=1.0, config=None):
- if config:
- self.max_labels = config.max_gt
- self.flip_prob = config.flip_prob
- self.hsv_prob = config.hsv_prob
- self.strides = config.fpn_strides
- self.input_size = config.input_size
- else:
- self.hsv_prob = 1.0
- self.flip_prob = 0.5
- self.max_labels = max_labels
- self.strides = [8, 16, 32]
- self.input_size = (640, 640)
- self.grid_size = [(self.input_size[0] / x) * (self.input_size[1] / x) for x in
- self.strides]
- self.num_total_anchor = int(sum(self.grid_size))
-
- def __call__(self, image, targets, input_dim):
- """ Tran transform call """
- boxes = targets[:, :4]
- labels = targets[:, 4]
- if not boxes.size:
- targets = np.zeros((self.max_labels, 5), dtype=np.float32)
- image, r_o = preproc(image, input_dim)
- is_in_boxes_all = np.zeros((self.max_labels, self.num_total_anchor)).astype(np.bool_)
- is_in_boxes_and_center = np.zeros((self.max_labels, self.num_total_anchor)).astype(np.bool_)
- return image, targets, is_in_boxes_all, is_in_boxes_and_center
- image_o = image.copy()
- targets_o = targets.copy()
- boxes_o = targets_o[:, :4]
- labels_o = targets_o[:, 4]
- boxes_o = xyxy2cxcywh(boxes_o)
-
- if random.random() < self.hsv_prob:
- augment_hsv(image)
- image_t, boxes = _mirror(image, boxes, self.flip_prob)
- image_t, r_ = preproc(image_t, input_dim)
- boxes = xyxy2cxcywh(boxes)
- boxes *= r_
-
- mask_b = np.minimum(boxes[:, 2], boxes[:, 3]) > 1
- boxes_t = boxes[mask_b]
- labels_t = labels[mask_b]
-
- if not boxes_t.size:
- image_t, r_o = preproc(image_o, input_dim)
- boxes_o *= r_o
- boxes_t = boxes_o
- labels_t = labels_o
-
- labels_t = np.expand_dims(labels_t, 1)
-
- targets_t = np.hstack((labels_t, boxes_t))
- padded_labels = np.zeros((self.max_labels, 5))
- true_labels = len(targets_t)
-
- padded_labels[range(len(targets_t))[: self.max_labels]] = targets_t[: self.max_labels]
- padded_labels = np.ascontiguousarray(padded_labels, dtype=np.float32)
- gt_bboxes_per_image = padded_labels[:, 1:5]
- is_in_boxes_all, is_in_boxes_and_center = self.get_in_boxes_info(gt_bboxes_per_image, true_labels)
- is_in_boxes_all = is_in_boxes_all.any(1).reshape((-1, 1)) * is_in_boxes_all.any(0).reshape((1, -1))
- return image_t, padded_labels, is_in_boxes_all, is_in_boxes_and_center
-
- def get_grid(self):
- """ get grid in each image """
- grid_size_x = []
- grid_size_y = []
- x_shifts = [] # (1, 6400) (1,1600) (1, 400) -->(1, 8400)
- y_shifts = [] # (1, 6400) (1,1600) (1, 400)
- expanded_strides = [] # (1, 6400) (1,1600) (1, 400)
- for _stride in self.strides:
- grid_size_x.append(int(self.input_size[0] / _stride))
- grid_size_y.append(int(self.input_size[1] / _stride))
- for i in range(len(grid_size_x)):
- xv, yv = np.meshgrid(np.arange(0, grid_size_y[i]), np.arange(0, grid_size_x[i]))
- grid = np.stack((xv, yv), 2).reshape(1, 1, grid_size_x[i], grid_size_y[i], 2)
- grid = grid.reshape(1, -1, 2)
- x_shifts.append(grid[:, :, 0])
- y_shifts.append(grid[:, :, 1])
- this_stride = np.zeros((1, grid.shape[1]))
- this_stride.fill(self.strides[i])
- this_stride = this_stride.astype(np.float32)
- expanded_strides.append(this_stride)
- x_shifts = np.concatenate(x_shifts, axis=1)
- y_shifts = np.concatenate(y_shifts, axis=1)
- expanded_strides = np.concatenate(expanded_strides, axis=1)
- return x_shifts, y_shifts, expanded_strides
-
- def get_in_boxes_info(self, gt_bboxes_per_image, true_lables):
- """ get the pre in-center and in-box info for each image """
- x_shifts, y_shifts, expanded_strides = self.get_grid()
- num_total_anchor = x_shifts.shape[1]
- expanded_strides = expanded_strides[0]
- x_shifts_per_image = x_shifts[0] * expanded_strides
- y_shifts_per_image = y_shifts[0] * expanded_strides
-
- x_centers_per_image = np.expand_dims((x_shifts_per_image + 0.5 * expanded_strides), axis=0)
- x_centers_per_image = np.repeat(x_centers_per_image, self.max_labels, axis=0)
-
- y_centers_per_image = np.expand_dims((y_shifts_per_image + 0.5 * expanded_strides), axis=0)
- y_centers_per_image = np.repeat(y_centers_per_image, self.max_labels, axis=0)
-
- gt_bboxes_per_image_l = np.expand_dims((gt_bboxes_per_image[:, 0] - 0.5 * gt_bboxes_per_image[:, 2]), axis=1)
- gt_bboxes_per_image_l = np.repeat(gt_bboxes_per_image_l, num_total_anchor, axis=1)
-
- gt_bboxes_per_image_r = np.expand_dims((gt_bboxes_per_image[:, 0] + 0.5 * gt_bboxes_per_image[:, 2]), axis=1)
- gt_bboxes_per_image_r = np.repeat(gt_bboxes_per_image_r, num_total_anchor, axis=1)
-
- gt_bboxes_per_image_t = np.expand_dims((gt_bboxes_per_image[:, 1] - 0.5 * gt_bboxes_per_image[:, 3]), axis=1)
- gt_bboxes_per_image_t = np.repeat(gt_bboxes_per_image_t, num_total_anchor, axis=1)
-
- gt_bboxes_per_image_b = np.expand_dims((gt_bboxes_per_image[:, 1] + 0.5 * gt_bboxes_per_image[:, 3]), axis=1)
- gt_bboxes_per_image_b = np.repeat(gt_bboxes_per_image_b, num_total_anchor, axis=1)
-
- b_l = x_centers_per_image - gt_bboxes_per_image_l
- b_r = gt_bboxes_per_image_r - x_centers_per_image
- b_t = y_centers_per_image - gt_bboxes_per_image_t
- b_b = gt_bboxes_per_image_b - y_centers_per_image
-
- bbox_deltas = np.stack([b_l, b_t, b_r, b_b], 2)
- is_in_boxes = bbox_deltas.min(axis=-1) > 0.0
- is_in_boxes[true_lables:, ...] = False
-
- center_radius = 2.5
- gt_bboxes_per_image_l = np.repeat(np.expand_dims((gt_bboxes_per_image[:, 0]), 1), num_total_anchor, 1) - \
- center_radius * np.expand_dims(expanded_strides, 0)
-
- gt_bboxes_per_image_r = np.repeat(np.expand_dims((gt_bboxes_per_image[:, 0]), 1), num_total_anchor, 1) + \
- center_radius * np.expand_dims(expanded_strides, 0)
-
- gt_bboxes_per_image_t = np.repeat(np.expand_dims((gt_bboxes_per_image[:, 1]), 1), num_total_anchor, 1) - \
- center_radius * np.expand_dims(expanded_strides, 0)
-
- gt_bboxes_per_image_b = np.repeat(np.expand_dims((gt_bboxes_per_image[:, 1]), 1), num_total_anchor, 1) + \
- center_radius * np.expand_dims(expanded_strides, 0)
-
- c_l = x_centers_per_image - gt_bboxes_per_image_l
- c_r = gt_bboxes_per_image_r - x_centers_per_image
- c_t = y_centers_per_image - gt_bboxes_per_image_t
- c_b = gt_bboxes_per_image_b - y_centers_per_image
-
- center_deltas = np.stack([c_l, c_r, c_t, c_b], 2)
- is_in_centers = center_deltas.min(axis=-1) > 0.0
- is_in_centers[true_lables:, ...] = False # padding gts are set False
-
- is_in_boxes_all = is_in_boxes | is_in_centers
- is_in_boxes_and_center = is_in_boxes & is_in_centers
- return is_in_boxes_all, is_in_boxes_and_center
-
-
- class ValTransform:
- """ image transform for val """
-
- def __init__(self, swap=(2, 0, 1), legacy=False):
- self.swap = swap
- self.legacy = legacy
- self.mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1))
- self.std = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1))
-
- def __call__(self, img, input_size):
- img, _ = preproc(img, input_size, self.swap)
- if self.legacy:
- img = img[::-1, :, :].copy() / 255.0
- img = (img - self.mean) / self.std
- return img, np.zeros((1, 5))
-
-
- def xyxy2cxcywh(bboxes):
- bboxes[:, 2] = bboxes[:, 2] - bboxes[:, 0]
- bboxes[:, 3] = bboxes[:, 3] - bboxes[:, 1]
- bboxes[:, 0] = bboxes[:, 0] + bboxes[:, 2] * 0.5
- bboxes[:, 1] = bboxes[:, 1] + bboxes[:, 3] * 0.5
- return bboxes
-
-
- def xyxy2xywh(bboxes):
- bboxes[:, 2] = bboxes[:, 2] - bboxes[:, 0]
- bboxes[:, 3] = bboxes[:, 3] - bboxes[:, 1]
- return bboxes
-
-
- def statistic_normalize_img(img, statistic_norm):
- """Statistic normalize images."""
- img = np.transpose(img, (1, 2, 0))
- img = img / 255.
- mean = np.array([0.485, 0.456, 0.406])
- std = np.array([0.229, 0.224, 0.225])
- if statistic_norm:
- img = (img - mean) / std
- return np.transpose(img, (2, 0, 1)).astype(np.float32)
|