|
- # Copyright 2022 Huawei Technologies Co., Ltd
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- # ============================================================================
- """Dataloader script."""
- import math
- import os
- import os.path as osp
- import random
- from collections import OrderedDict
- from pathlib import Path
-
- import cv2
- import numpy as np
-
- from src.utils import build_thresholds
- from src.utils import create_anchors_vec
- from src.utils import xyxy2xywh
-
-
- class LoadImages:
- """
- Loader for inference.
-
- Args:
- path (str): Path to the directory, containing images.
- img_size (list): Size of output image.
-
- Returns:
- img (np.array): Processed image.
- img0 (np.array): Original image.
- """
- def __init__(self, path, anchor_scales, img_size=(1088, 608)):
- path = Path(path)
- if not path.is_dir():
- raise NotADirectoryError(f'Expected a path to the directory with images, got "{path}"')
-
- self.files = sorted(path.glob('*.jpg'))
-
- self.anchors, self.strides = create_anchors_vec(anchor_scales)
- self.nf = len(self.files) # Number of img files.
- self.width = img_size[0]
- self.height = img_size[1]
- self.count = 0
-
- assert self.nf > 0, 'No images found in ' + path
-
- def __iter__(self):
- self.count = -1
- return self
-
- def __next__(self):
- self.count += 1
- if self.count == self.nf:
- raise StopIteration
- img_path = str(self.files[self.count])
-
- # Read image
- img0 = cv2.imread(img_path) # BGR
- assert img0 is not None, 'Failed to load ' + img_path
-
- # Padded resize
- img, _, _, _ = letterbox(img0, height=self.height, width=self.width)
-
- # Normalize RGB
- img = img[:, :, ::-1].transpose(2, 0, 1)
- img = np.ascontiguousarray(img, dtype=np.float32)
- img /= 255.0
-
- output = (img, img0)
-
- return output
-
- def __getitem__(self, idx):
- idx = idx % self.nf
- img_path = self.files[idx]
-
- # Read image
- img0 = cv2.imread(img_path) # BGR
- assert img0 is not None, 'Failed to load ' + img_path
-
- # Padded resize
- img, _, _, _ = letterbox(img0, height=self.height, width=self.width)
-
- # Normalize RGB
- img = img[:, :, ::-1].transpose(2, 0, 1)
- img = np.ascontiguousarray(img, dtype=np.float32)
- img /= 255.0
-
- output = (img, img0)
-
- return output
-
- def __len__(self):
- return self.nf # number of files
-
-
- class LoadVideo:
- """
- Video loader for inference.
-
- Args:
- path (str): Path to video.
- img_size (tuple): Size of output images size.
-
- Returns:
- count (int): Number of frame.
- img (np.array): Processed image.
- img0 (np.array): Original image.
- """
- def __init__(self, path, anchor_scales, img_size=(1088, 608)):
- if not os.path.isfile(path):
- raise FileExistsError
-
- self.cap = cv2.VideoCapture(path)
- self.frame_rate = int(round(self.cap.get(cv2.CAP_PROP_FPS)))
- self.vw = int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH))
- self.vh = int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
- self.vn = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))
-
- self.anchors, self.strides = create_anchors_vec(anchor_scales)
-
- self.width = img_size[0]
- self.height = img_size[1]
- self.count = 0
-
- self.w, self.h = self.get_size(self.vw, self.vh, self.width, self.height)
- print(f'Lenth of the video: {self.vn:d} frames')
-
- def get_size(self, vw, vh, dw, dh):
- wa, ha = float(dw) / vw, float(dh) / vh
- a = min(wa, ha)
- return int(vw * a), int(vh * a)
-
- def __iter__(self):
- self.count = -1
- return self
-
- def __next__(self):
- self.count += 1
- if self.count == len(self):
- raise StopIteration
- # Read image
- _, img0 = self.cap.read() # BGR
- assert img0 is not None, f'Failed to load frame {self.count:d}'
- img0 = cv2.resize(img0, (self.w, self.h))
-
- # Padded resize
- img, _, _, _ = letterbox(img0, height=self.height, width=self.width)
-
- # Normalize RGB
- img = img[:, :, ::-1].transpose(2, 0, 1)
- img = np.ascontiguousarray(img, dtype=np.float32)
- img /= 255.0
-
- output = (img, img0)
-
- return output
-
- def __len__(self):
- return self.vn # number of files
-
-
- class JointDataset:
- """
- Loader for all datasets.
-
- Args:
- root (str): Absolute path to datasets.
- paths (dict): Relative paths for datasets.
- img_size (list): Size of output image.
- augment (bool): Augment images or not.
- transforms: Transform methods.
- config (class): Config with hyperparameters.
-
- Returns:
- imgs (np_array): Prepared image. Shape (C, H, W)
- tconf (s, m, b) (np_array): Mask with bg (0), gt (1) and ign (-1) indices. Shape (nA, nGh, nGw).
- tbox (s, m, b) (np_array): Targets delta bbox values. Shape (nA, nGh, nGw, 4).
- tid (s, m, b) (np_array): Grid with id for every cell. Shape (nA, nGh, nGw).
- """
- def __init__(
- self,
- root,
- paths,
- img_size=(1088, 608),
- k_max=200,
- augment=False,
- transforms=None,
- config=None,
- ):
- self.img_files = OrderedDict()
- self.label_files = OrderedDict()
- self.tid_num = OrderedDict()
- self.tid_start_index = OrderedDict()
- self.config = config
- self.anchors, self.strides = create_anchors_vec(config.anchor_scales)
- self.k_max = k_max
-
- # Iterate for all of datasets to prepare paths to labels
- for ds, img_path in paths.items():
- with open(img_path, 'r') as file:
- self.img_files[ds] = file.readlines()
- self.img_files[ds] = [osp.join(root, x.strip()) for x in self.img_files[ds]]
- self.img_files[ds] = list(filter(lambda x: len(x) > 0, self.img_files[ds]))
-
- self.label_files[ds] = [
- x.replace('images', 'labels_with_ids').replace('.png', '.txt').replace('.jpg', '.txt')
- for x in self.img_files[ds]]
-
- # Search for max pedestrian id in dataset
- for ds, label_paths in self.label_files.items():
- max_index = -1
- for lp in label_paths:
- lb = np.loadtxt(lp)
- if lb.shape[0] < 1:
- continue
- if lb.ndim < 2:
- img_max = lb[1]
- else:
- img_max = np.max(lb[:, 1])
- if img_max > max_index:
- max_index = img_max
- self.tid_num[ds] = max_index + 1
-
- last_index = 0
- for k, v in self.tid_num.items():
- self.tid_start_index[k] = last_index
- last_index += v
-
- self.nid = int(last_index + 1)
- self.nds = [len(x) for x in self.img_files.values()]
- self.cds = [sum(self.nds[:i]) for i in range(len(self.nds))]
- self.nf = sum(self.nds)
- self.width = img_size[0]
- self.height = img_size[1]
- self.augment = augment
- self.transforms = transforms
-
- print('=' * 40)
- print('dataset summary')
- print(self.tid_num)
- print('total # identities:', self.nid)
- print('start index')
- print(self.tid_start_index)
- print('=' * 40)
-
- def get_data(self, img_path, label_path):
- """
- Get and prepare data (augment img).
- """
- height = self.height
- width = self.width
- img = cv2.imread(img_path) # BGR
- if img is None:
- raise ValueError(f'File corrupt {img_path}')
- augment_hsv = True
- if self.augment and augment_hsv:
- # SV augmentation by 50%
- fraction = 0.50
- img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
- s = img_hsv[:, :, 1].astype(np.float32)
- v = img_hsv[:, :, 2].astype(np.float32)
-
- a = (random.random() * 2 - 1) * fraction + 1
- s *= a
- if a > 1:
- np.clip(s, a_min=0, a_max=255, out=s)
-
- a = (random.random() * 2 - 1) * fraction + 1
- v *= a
- if a > 1:
- np.clip(v, a_min=0, a_max=255, out=v)
-
- img_hsv[:, :, 1] = s.astype(np.uint8)
- img_hsv[:, :, 2] = v.astype(np.uint8)
- cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img)
-
- h, w, _ = img.shape
- img, ratio, padw, padh = letterbox(img, height=height, width=width)
-
- # Load labels
- if os.path.isfile(label_path):
- labels0 = np.loadtxt(label_path, dtype=np.float32).reshape(-1, 6)
-
- # Normalized xywh to pixel xyxy format
- labels = labels0.copy()
- labels[:, 2] = ratio * w * (labels0[:, 2] - labels0[:, 4] / 2) + padw
- labels[:, 3] = ratio * h * (labels0[:, 3] - labels0[:, 5] / 2) + padh
- labels[:, 4] = ratio * w * (labels0[:, 2] + labels0[:, 4] / 2) + padw
- labels[:, 5] = ratio * h * (labels0[:, 3] + labels0[:, 5] / 2) + padh
- else:
- labels = np.array([])
-
- # Augment image and labels
- if self.augment:
- img, labels, _ = random_affine(img, labels, degrees=(-5, 5), translate=(0.10, 0.10), scale=(0.50, 1.20))
-
- nlbls = len(labels)
- if nlbls > 0:
- # convert xyxy to xywh
- labels[:, 2:6] = xyxy2xywh(labels[:, 2:6].copy()) # / height
- labels[:, 2] /= width
- labels[:, 3] /= height
- labels[:, 4] /= width
- labels[:, 5] /= height
- if self.augment:
- # random left-right flip
- lr_flip = True
- if lr_flip & (random.random() > 0.5):
- img = np.fliplr(img)
- if nlbls > 0:
- labels[:, 2] = 1 - labels[:, 2]
-
- img = np.ascontiguousarray(img[:, :, ::-1]) # BGR to RGB
- if self.transforms is not None:
- img = self.transforms(img)
-
- return img, labels, img_path
-
- def __getitem__(self, files_index):
- """
- Iterator function for train dataset
- """
- for i, c in enumerate(self.cds):
- if files_index >= c:
- ds = list(self.label_files.keys())[i]
- start_index = c
- img_path = self.img_files[ds][files_index - start_index]
- label_path = self.label_files[ds][files_index - start_index]
-
- imgs, labels, img_path = self.get_data(img_path, label_path)
- for i, _ in enumerate(labels):
- if labels[i, 1] > -1:
- labels[i, 1] += self.tid_start_index[ds]
-
- # Graph mode in Mindspore uses constant shapes
- # Thus, it is necessary to fill targets to max possible ids in image
- to_fill = 100 - labels.shape[0]
- padding = np.zeros((to_fill, 6), dtype=np.float32)
- labels = np.concatenate((labels, padding), axis=0)
-
- # Calculate confidence mask, bbox delta and ids for every map size
- small, medium, big = build_thresholds(
- labels=labels,
- anchor_vec_s=self.anchors[0],
- anchor_vec_m=self.anchors[1],
- anchor_vec_b=self.anchors[2],
- k_max=self.k_max,
- )
-
- tconf_s, tbox_s, tid_s, emb_indices_s = small
- tconf_m, tbox_m, tid_m, emb_indices_m = medium
- tconf_b, tbox_b, tid_b, emb_indices_b = big
-
- total_values = (
- imgs.astype(np.float32),
- tconf_s,
- tbox_s,
- tid_s,
- tconf_m,
- tbox_m,
- tid_m,
- tconf_b,
- tbox_b,
- tid_b,
- emb_indices_s,
- emb_indices_m,
- emb_indices_b,
- )
- return total_values
-
- def __len__(self):
- return self.nf # number of batches
-
-
- class JointDatasetDetection(JointDataset):
- """
- Joint dataset for evaluation.
- """
- def __init__(self, *args, **kwargs):
- super().__init__(*args, **kwargs)
-
- def __getitem__(self, files_index):
- """
- Iterator function for train dataset.
- """
- for i, c in enumerate(self.cds):
- if files_index >= c:
- ds = list(self.label_files.keys())[i]
- start_index = c
- img_path = self.img_files[ds][files_index - start_index]
- label_path = self.label_files[ds][files_index - start_index]
-
- imgs, labels, img_path = self.get_data(img_path, label_path)
- for i, _ in enumerate(labels):
- if labels[i, 1] > -1:
- labels[i, 1] += self.tid_start_index[ds]
-
- targets_size = labels.shape[0]
-
- # Graph mode in Mindspore uses constant shapes
- # Thus, it is necessary to fill targets to max possible ids in image.
- to_fill = 100 - labels.shape[0]
- padding = np.zeros((to_fill, 6), dtype=np.float32)
- labels = np.concatenate((labels, padding), axis=0)
-
- output = (imgs.astype(np.float32), labels, targets_size)
-
- return output
-
-
- def letterbox(
- img,
- height=608,
- width=1088,
- color=(127.5, 127.5, 127.5),
- ):
- """
- Resize a rectangular image to a padded rectangular
- and fill padded border with color.
- """
- shape = img.shape[:2] # shape = [height, width]
- ratio = min(float(height) / shape[0], float(width) / shape[1])
- new_shape = (round(shape[1] * ratio), round(shape[0] * ratio)) # new_shape = [width, height]
- dw = (width - new_shape[0]) / 2 # width padding
- dh = (height - new_shape[1]) / 2 # height padding
- top, bottom = round(dh - 0.1), round(dh + 0.1)
- left, right = round(dw - 0.1), round(dw + 0.1)
- img = cv2.resize(img, new_shape, interpolation=cv2.INTER_AREA) # resized, no border
- img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # padded rectangular
-
- return img, ratio, dw, dh
-
-
- def random_affine(
- img,
- targets=None,
- degrees=(-10, 10),
- translate=(.1, .1),
- scale=(.9, 1.1),
- shear=(-2, 2),
- border_value=(127.5, 127.5, 127.5),
- ):
- """
- Apply several data augmentation techniques,
- such as random rotation, random scale, color jittering
- to reduce overfitting.
-
- Every rotation and scaling and etc.
- is also applied to targets bbox cords.
- """
- border = 0 # width of added border (optional)
- height = img.shape[0]
- width = img.shape[1]
-
- # Rotation and Scale
- r = np.eye(3)
- a = random.random() * (degrees[1] - degrees[0]) + degrees[0]
- s = random.random() * (scale[1] - scale[0]) + scale[0]
- r[:2] = cv2.getRotationMatrix2D(angle=a, center=(img.shape[1] / 2, img.shape[0] / 2), scale=s)
-
- # Translation
- t = np.eye(3)
- t[0, 2] = (random.random() * 2 - 1) * translate[0] * img.shape[0] + border # x translation (pixels)
- t[1, 2] = (random.random() * 2 - 1) * translate[1] * img.shape[1] + border # y translation (pixels)
-
- # Shear
- s = np.eye(3)
- s[0, 1] = math.tan((random.random() * (shear[1] - shear[0]) + shear[0]) * math.pi / 180) # x shear (deg)
- s[1, 0] = math.tan((random.random() * (shear[1] - shear[0]) + shear[0]) * math.pi / 180) # y shear (deg)
-
- m = s @ t @ r # Combined rotation matrix. ORDER IS IMPORTANT HERE!
- imw = cv2.warpPerspective(img, m, dsize=(width, height), flags=cv2.INTER_LINEAR,
- borderValue=border_value) # BGR order borderValue
-
- # Return warped points also
- if targets is not None:
- if targets.shape[0] > 0:
- n = targets.shape[0]
- points = targets[:, 2:6].copy()
- area0 = (points[:, 2] - points[:, 0]) * (points[:, 3] - points[:, 1])
-
- # warp points
- xy = np.ones((n * 4, 3))
- xy[:, :2] = points[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape(n * 4, 2) # x1y1, x2y2, x1y2, x2y1
- xy = (xy @ m.T)[:, :2].reshape(n, 8)
-
- # create new boxes
- x = xy[:, [0, 2, 4, 6]]
- y = xy[:, [1, 3, 5, 7]]
- xy = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T
-
- # apply angle-based reduction
- radians = a * math.pi / 180
- reduction = max(abs(math.sin(radians)), abs(math.cos(radians))) ** 0.5
- x = (xy[:, 2] + xy[:, 0]) / 2
- y = (xy[:, 3] + xy[:, 1]) / 2
- w = (xy[:, 2] - xy[:, 0]) * reduction
- h = (xy[:, 3] - xy[:, 1]) * reduction
- xy = np.concatenate((x - w / 2, y - h / 2, x + w / 2, y + h / 2)).reshape(4, n).T
-
- # reject warped points outside of image
- np.clip(xy[:, 0], 0, width, out=xy[:, 0])
- np.clip(xy[:, 2], 0, width, out=xy[:, 2])
- np.clip(xy[:, 1], 0, height, out=xy[:, 1])
- np.clip(xy[:, 3], 0, height, out=xy[:, 3])
- w = xy[:, 2] - xy[:, 0]
- h = xy[:, 3] - xy[:, 1]
- area = w * h
- ar = np.maximum(w / (h + 1e-16), h / (w + 1e-16))
- i = (w > 4) & (h > 4) & (area / (area0 + 1e-16) > 0.1) & (ar < 10)
-
- targets = targets[i]
- targets[:, 2:6] = xy[i]
-
- return imw, targets, m
-
- return imw
|