|
- from random import sample, shuffle
-
- import cv2
- import numpy as np
- from PIL import Image
- from torch.utils.data.dataset import Dataset
-
- from utils.utils import cvtColor, preprocess_input
-
-
- class NuscenesDataset(Dataset):
- def __init__(self, annotation_lines, input_shape, num_classes, epoch_length, mosaic, train, mosaic_ratio=0.9):
- super(NuscenesDataset, self).__init__()
- self.annotation_lines = annotation_lines
- self.length = len(self.annotation_lines)
-
- self.input_shape = input_shape
- self.num_classes = num_classes
- self.epoch_length = epoch_length
- self.mosaic = mosaic
- self.train = train
-
- self.step_now = -1
- self.mosaic_ratio = mosaic_ratio
-
- def __len__(self):
- return self.length
-
- def __getitem__(self, index):
- index = index % self.length
-
- self.step_now += 1
- # ---------------------------------------------------#
- # 训练时进行数据的随机增强
- # 验证时不进行数据的随机增强
- # ---------------------------------------------------#
- if self.mosaic:
- if self.rand() < 0.5 and self.step_now < self.epoch_length * self.mosaic_ratio * self.length:
- lines = sample(self.annotation_lines, 3)
- lines.append(self.annotation_lines[index])
- shuffle(lines)
- cam_image,radar_image,box = self.get_random_data_with_Mosaic(lines, self.input_shape)
- else:
- cam_image,radar_image,box = self.get_random_data(self.annotation_lines[index], self.input_shape, random=self.train)
- else:
- cam_image,radar_image,box = self.get_random_data(self.annotation_lines[index], self.input_shape, random=self.train)
- cam_image = np.transpose(preprocess_input(np.array(cam_image, dtype=np.float32)), (2, 0, 1))
- # radar_image = np.transpose(preprocess_input(np.array(radar_image, dtype=np.float32)), (2, 0, 1))
- radar_image = np.transpose((np.array(radar_image, dtype=np.float32)/255), (2, 0, 1))
- box = np.array(box, dtype=np.float32)
- if len(box) != 0:
- box[:, 2:4] = box[:, 2:4] - box[:, 0:2]
- box[:, 0:2] = box[:, 0:2] + box[:, 2:4] / 2
- return cam_image,radar_image, box
-
- def rand(self, a=0, b=1):
- return np.random.rand() * (b - a) + a
-
- def get_random_data(self, annotation_line, input_shape, jitter=.3, hue=.1, sat=1.5, val=1.5, random=True):
- line = annotation_line.split()
- # ------------------------------#
- # 读取视觉图像并转换成RGB图像
- # ------------------------------#
- cam_image = Image.open(line[0])
- cam_image = cvtColor(cam_image)
- # ------------------------------#
- # 读取雷达图像并转换成RGB图像
- # ------------------------------#
- radar_image_path=line[0].replace("CAM_FRONT","RADAR_IMAGE").replace("jpg","png")
- radar_image=Image.open(radar_image_path)
- radar_image = cvtColor(radar_image)
-
- # ------------------------------#
- # 获得视觉、雷达图像的高宽与目标高宽
- # ------------------------------#
- iw, ih = cam_image.size
- rw, rh = radar_image.size
- h, w = input_shape
- # ------------------------------#
- # 获得预测框
- # ------------------------------#
- box = np.array([np.array(list(map(int, box.split(',')))) for box in line[1:]])
-
- if not random:
- # ------------------------------#
- # 调整视觉图像尺寸
- # ------------------------------#
- scale = min(w / iw, h / ih)
- nw = int(iw * scale)
- nh = int(ih * scale)
- dx = (w - nw) // 2
- dy = (h - nh) // 2
- # ---------------------------------#
- # 将视觉图像多余的部分加上灰条
- # ---------------------------------#
- cam_image = cam_image.resize((nw, nh), Image.BICUBIC)
- new_image = Image.new('RGB', (w, h), (128, 128, 128))
- new_image.paste(cam_image, (dx, dy))
- cam_image_data = np.array(new_image, np.float32)
- # ------------------------------#
- # 调整雷达图像尺寸
- # ------------------------------#
- scale = min(w / rw, h / rh)
- nw_r = int(rw * scale)
- nh_r = int(rh * scale)
- dx_r = (w - nw_r) // 2
- dy_r = (h - nh_r) // 2
- # ---------------------------------#
- # 将雷达图像多余的部分加上灰条
- # ---------------------------------#
- radar_image = radar_image.resize((nw_r, nh_r), Image.BICUBIC)
- new_image_r = Image.new('RGB', (w, h), (128, 128, 128))
- # new_image_r = Image.new('RGB', (w, h), (0, 0, 0))
- new_image_r.paste(radar_image, (dx_r, dy_r))
- radar_image_data = np.array(new_image_r, np.float32)
- # ---------------------------------#
- # 对真实框进行调整
- # ---------------------------------#
- if len(box) > 0:
- np.random.shuffle(box)
- box[:, [0, 2]] = box[:, [0, 2]] * nw / iw + dx
- box[:, [1, 3]] = box[:, [1, 3]] * nh / ih + dy
- box[:, 0:2][box[:, 0:2] < 0] = 0
- box[:, 2][box[:, 2] > w] = w
- box[:, 3][box[:, 3] > h] = h
- box_w = box[:, 2] - box[:, 0]
- box_h = box[:, 3] - box[:, 1]
- box = box[np.logical_and(box_w > 1, box_h > 1)] # discard invalid box
-
- return cam_image_data,radar_image_data,box
-
- # ------------------------------------------#
- # 对图像进行缩放并且进行长和宽的扭曲
- # ------------------------------------------#
- new_ar = w / h * self.rand(1 - jitter, 1 + jitter) / self.rand(1 - jitter, 1 + jitter)
- scale = self.rand(.25, 2)
- if new_ar < 1:
- nh = int(scale * h)
- nw = int(nh * new_ar)
- else:
- nw = int(scale * w)
- nh = int(nw / new_ar)
- cam_image = cam_image.resize((nw, nh), Image.BICUBIC)
- radar_image=radar_image.resize((nw, nh), Image.BICUBIC)
-
- # ------------------------------------------#
- # 将视觉图像多余的部分加上灰条
- # ------------------------------------------#
- dx = int(self.rand(0, w - nw))
- dy = int(self.rand(0, h - nh))
- new_image = Image.new('RGB', (w, h), (128, 128, 128))
- new_image.paste(cam_image, (dx, dy))
- cam_image = new_image
-
- # ------------------------------------------#
- # 将雷达图像多余的部分加上灰条
- # ------------------------------------------#
- new_image_r = Image.new('RGB', (w, h), (128, 128, 128))
- # new_image_r = Image.new('RGB', (w, h), (0, 0, 0))
- new_image_r.paste(radar_image, (dx, dy))
- radar_image = new_image_r
- # ------------------------------------------#
- # 翻转图像
- # ------------------------------------------#
- flip = self.rand() < .5
- if flip:
- cam_image = cam_image.transpose(Image.FLIP_LEFT_RIGHT)
- radar_image = radar_image.transpose(Image.FLIP_LEFT_RIGHT)
-
- # ------------------------------------------#
- # 色域扭曲
- # ------------------------------------------#
- hue = self.rand(-hue, hue)
- sat = self.rand(1, sat) if self.rand() < .5 else 1 / self.rand(1, sat)
- val = self.rand(1, val) if self.rand() < .5 else 1 / self.rand(1, val)
- ##-------------视觉图像色域扭曲--------------##
- x = cv2.cvtColor(np.array(cam_image, np.float32) / 255, cv2.COLOR_RGB2HSV)
- x[..., 0] += hue * 360
- x[..., 0][x[..., 0] > 1] -= 1
- x[..., 0][x[..., 0] < 0] += 1
- x[..., 1] *= sat
- x[..., 2] *= val
- x[x[:, :, 0] > 360, 0] = 360
- x[:, :, 1:][x[:, :, 1:] > 1] = 1
- x[x < 0] = 0
- cam_image_data = cv2.cvtColor(x, cv2.COLOR_HSV2RGB) * 255
- ##-------------雷达图像不进行色域扭曲--------------##
- radar_image_data = radar_image
-
- # ---------------------------------#
- # 对真实框进行调整
- # ---------------------------------#
- if len(box) > 0:
- np.random.shuffle(box)
- box[:, [0, 2]] = box[:, [0, 2]] * nw / iw + dx
- box[:, [1, 3]] = box[:, [1, 3]] * nh / ih + dy
- if flip: box[:, [0, 2]] = w - box[:, [2, 0]]
- box[:, 0:2][box[:, 0:2] < 0] = 0
- box[:, 2][box[:, 2] > w] = w
- box[:, 3][box[:, 3] > h] = h
- box_w = box[:, 2] - box[:, 0]
- box_h = box[:, 3] - box[:, 1]
- box = box[np.logical_and(box_w > 1, box_h > 1)]
-
- return cam_image_data,radar_image_data,box
-
- def merge_bboxes(self, bboxes, cutx, cuty):
- merge_bbox = []
- for i in range(len(bboxes)):
- for box in bboxes[i]:
- tmp_box = []
- x1, y1, x2, y2 = box[0], box[1], box[2], box[3]
-
- if i == 0:
- if y1 > cuty or x1 > cutx:
- continue
- if y2 >= cuty and y1 <= cuty:
- y2 = cuty
- if x2 >= cutx and x1 <= cutx:
- x2 = cutx
-
- if i == 1:
- if y2 < cuty or x1 > cutx:
- continue
- if y2 >= cuty and y1 <= cuty:
- y1 = cuty
- if x2 >= cutx and x1 <= cutx:
- x2 = cutx
-
- if i == 2:
- if y2 < cuty or x2 < cutx:
- continue
- if y2 >= cuty and y1 <= cuty:
- y1 = cuty
- if x2 >= cutx and x1 <= cutx:
- x1 = cutx
-
- if i == 3:
- if y1 > cuty or x2 < cutx:
- continue
- if y2 >= cuty and y1 <= cuty:
- y2 = cuty
- if x2 >= cutx and x1 <= cutx:
- x1 = cutx
- tmp_box.append(x1)
- tmp_box.append(y1)
- tmp_box.append(x2)
- tmp_box.append(y2)
- tmp_box.append(box[-1])
- merge_bbox.append(tmp_box)
- return merge_bbox
-
- def get_random_data_with_Mosaic(self, annotation_line, input_shape, max_boxes=100, hue=.1, sat=1.5, val=1.5):
- h, w = input_shape
- min_offset_x = self.rand(0.25, 0.75)
- min_offset_y = self.rand(0.25, 0.75)
-
- nws = [int(w * self.rand(0.4, 1)), int(w * self.rand(0.4, 1)), int(w * self.rand(0.4, 1)),
- int(w * self.rand(0.4, 1))]
- nhs = [int(h * self.rand(0.4, 1)), int(h * self.rand(0.4, 1)), int(h * self.rand(0.4, 1)),
- int(h * self.rand(0.4, 1))]
-
- place_x = [int(w * min_offset_x) - nws[0], int(w * min_offset_x) - nws[1], int(w * min_offset_x),
- int(w * min_offset_x)]
- place_y = [int(h * min_offset_y) - nhs[0], int(h * min_offset_y), int(h * min_offset_y),
- int(h * min_offset_y) - nhs[3]]
-
- cam_image_datas = []
- radar_image_datas = []
- box_datas = []
- index = 0
- for line in annotation_line:
- # 每一行进行分割
- line_content = line.split()
- # 打开图片
- cam_image = Image.open(line_content[0])
- cam_image = cvtColor(cam_image)
-
- radar_image_path = line_content[0].replace("CAM_FRONT","RADAR_IMAGE").replace("jpg","png")
- radar_image = Image.open(radar_image_path)
- radar_image = cvtColor(radar_image)
-
- # 图片的大小
- iw, ih = cam_image.size
- rw, rh = radar_image.size
- # 保存框的位置
- box = np.array([np.array(list(map(int, box.split(',')))) for box in line_content[1:]])
-
- # 是否翻转图片
- flip = self.rand() < .5
- if flip and len(box) > 0:
- cam_image = cam_image.transpose(Image.FLIP_LEFT_RIGHT)
- radar_image = radar_image.transpose(Image.FLIP_LEFT_RIGHT)
- box[:, [0, 2]] = iw - box[:, [2, 0]]
-
- nw = nws[index]
- nh = nhs[index]
- cam_image = cam_image.resize((nw, nh), Image.BICUBIC)
- radar_image = radar_image.resize((nw, nh), Image.BICUBIC)
-
- # 将视觉图片进行放置,分别对应四张分割图片的位置
- dx = place_x[index]
- dy = place_y[index]
- new_image = Image.new('RGB', (w, h), (128, 128, 128))
- new_image.paste(cam_image, (dx, dy))
- cam_image_data = np.array(new_image)
-
- # 将雷达图片进行放置,分别对应四张分割图片的位置
- new_image_r = Image.new('RGB', (w, h), (128, 128, 128))
- # new_image_r = Image.new('RGB', (w, h), (0, 0, 0))
-
- new_image_r.paste(radar_image, (dx, dy))
- radar_image_data = np.array(new_image_r)
-
- index = index + 1
- box_data = []
- # 对box进行重新处理
- if len(box) > 0:
- np.random.shuffle(box)
- box[:, [0, 2]] = box[:, [0, 2]] * nw / iw + dx
- box[:, [1, 3]] = box[:, [1, 3]] * nh / ih + dy
- box[:, 0:2][box[:, 0:2] < 0] = 0
- box[:, 2][box[:, 2] > w] = w
- box[:, 3][box[:, 3] > h] = h
- box_w = box[:, 2] - box[:, 0]
- box_h = box[:, 3] - box[:, 1]
- box = box[np.logical_and(box_w > 1, box_h > 1)]
- box_data = np.zeros((len(box), 5))
- box_data[:len(box)] = box
-
- cam_image_datas.append(cam_image_data)
- radar_image_datas.append(radar_image_data)
- box_datas.append(box_data)
-
- # 将图片分割,放在一起
- cutx = int(w * min_offset_x)
- cuty = int(h * min_offset_y)
- ##-------视觉图像拼接---------##
- new_image = np.zeros([h, w, 3])
- new_image[:cuty, :cutx, :] = cam_image_datas[0][:cuty, :cutx, :]
- new_image[cuty:, :cutx, :] = cam_image_datas[1][cuty:, :cutx, :]
- new_image[cuty:, cutx:, :] = cam_image_datas[2][cuty:, cutx:, :]
- new_image[:cuty, cutx:, :] = cam_image_datas[3][:cuty, cutx:, :]
- ##-------雷达图像拼接---------##
- new_image_r = np.zeros([h, w, 3])
- new_image_r[:cuty, :cutx, :] = radar_image_datas[0][:cuty, :cutx, :]
- new_image_r[cuty:, :cutx, :] = radar_image_datas[1][cuty:, :cutx, :]
- new_image_r[cuty:, cutx:, :] = radar_image_datas[2][cuty:, cutx:, :]
- new_image_r[:cuty, cutx:, :] = radar_image_datas[3][:cuty, cutx:, :]
-
- # 进行色域变换
- hue = self.rand(-hue, hue)
- sat = self.rand(1, sat) if self.rand() < .5 else 1 / self.rand(1, sat)
- val = self.rand(1, val) if self.rand() < .5 else 1 / self.rand(1, val)
- ##-------------视觉图像色域扭曲--------------##
- x = cv2.cvtColor(np.array(new_image / 255, np.float32), cv2.COLOR_RGB2HSV)
- x[..., 0] += hue * 360
- x[..., 0][x[..., 0] > 1] -= 1
- x[..., 0][x[..., 0] < 0] += 1
- x[..., 1] *= sat
- x[..., 2] *= val
- x[x[:, :, 0] > 360, 0] = 360
- x[:, :, 1:][x[:, :, 1:] > 1] = 1
- x[x < 0] = 0
- cam_image_data = cv2.cvtColor(x, cv2.COLOR_HSV2RGB) * 255
- ##-------------雷达图像不进行色域扭曲--------------##
- radar_image_data=new_image_r
-
- # 对框进行进一步的处理
- new_boxes = self.merge_bboxes(box_datas, cutx, cuty)
-
- return cam_image_data,radar_image_data,new_boxes
-
-
- # DataLoader中collate_fn使用
- def nuscenes_dataset_collate(batch):
- cam_images = []
- radar_images = []
- bboxes = []
- for cam_img,radar_img,box in batch:
- cam_images.append(cam_img)
- radar_images.append(radar_img)
- bboxes.append(box)
- cam_images = np.array(cam_images)
- radar_images = np.array(radar_images)
- return cam_images,radar_images,bboxes
|