xiaohe
/
xiao_he_test

 
			
							from random import sample, shuffle

import cv2
import numpy as np
from PIL import Image
from torch.utils.data.dataset import Dataset

from utils.utils import cvtColor, preprocess_input


class NuscenesDataset(Dataset):
    def __init__(self, annotation_lines, input_shape, num_classes, epoch_length, mosaic, train, mosaic_ratio=0.9):
        super(NuscenesDataset, self).__init__()
        self.annotation_lines = annotation_lines
        self.length = len(self.annotation_lines)

        self.input_shape = input_shape
        self.num_classes = num_classes
        self.epoch_length = epoch_length
        self.mosaic = mosaic
        self.train = train

        self.step_now = -1
        self.mosaic_ratio = mosaic_ratio

    def __len__(self):
        return self.length

    def __getitem__(self, index):
        index = index % self.length

        self.step_now += 1
        # ---------------------------------------------------#
        #   训练时进行数据的随机增强
        #   验证时不进行数据的随机增强
        # ---------------------------------------------------#
        if self.mosaic:
            if self.rand() < 0.5 and self.step_now < self.epoch_length * self.mosaic_ratio * self.length:
                lines = sample(self.annotation_lines, 3)
                lines.append(self.annotation_lines[index])
                shuffle(lines)
                cam_image,radar_image,box = self.get_random_data_with_Mosaic(lines, self.input_shape)
            else:
                cam_image,radar_image,box = self.get_random_data(self.annotation_lines[index], self.input_shape, random=self.train)
        else:
            cam_image,radar_image,box = self.get_random_data(self.annotation_lines[index], self.input_shape, random=self.train)
        cam_image = np.transpose(preprocess_input(np.array(cam_image, dtype=np.float32)), (2, 0, 1))
        # radar_image = np.transpose(preprocess_input(np.array(radar_image, dtype=np.float32)), (2, 0, 1))
        radar_image = np.transpose((np.array(radar_image, dtype=np.float32)/255), (2, 0, 1))
        box = np.array(box, dtype=np.float32)
        if len(box) != 0:
            box[:, 2:4] = box[:, 2:4] - box[:, 0:2]
            box[:, 0:2] = box[:, 0:2] + box[:, 2:4] / 2
        return cam_image,radar_image, box

    def rand(self, a=0, b=1):
        return np.random.rand() * (b - a) + a

    def get_random_data(self, annotation_line, input_shape, jitter=.3, hue=.1, sat=1.5, val=1.5, random=True):
        line = annotation_line.split()
        # ------------------------------#
        #   读取视觉图像并转换成RGB图像
        # ------------------------------#
        cam_image = Image.open(line[0])
        cam_image = cvtColor(cam_image)
        # ------------------------------#
        #   读取雷达图像并转换成RGB图像
        # ------------------------------#
        radar_image_path=line[0].replace("CAM_FRONT","RADAR_IMAGE").replace("jpg","png")
        radar_image=Image.open(radar_image_path)
        radar_image = cvtColor(radar_image)

        # ------------------------------#
        #   获得视觉、雷达图像的高宽与目标高宽
        # ------------------------------#
        iw, ih = cam_image.size
        rw, rh = radar_image.size
        h, w = input_shape
        # ------------------------------#
        #   获得预测框
        # ------------------------------#
        box = np.array([np.array(list(map(int, box.split(',')))) for box in line[1:]])

        if not random:
            # ------------------------------#
            #   调整视觉图像尺寸
            # ------------------------------#
            scale = min(w / iw, h / ih)
            nw = int(iw * scale)
            nh = int(ih * scale)
            dx = (w - nw) // 2
            dy = (h - nh) // 2
            # ---------------------------------#
            #   将视觉图像多余的部分加上灰条
            # ---------------------------------#
            cam_image = cam_image.resize((nw, nh), Image.BICUBIC)
            new_image = Image.new('RGB', (w, h), (128, 128, 128))
            new_image.paste(cam_image, (dx, dy))
            cam_image_data = np.array(new_image, np.float32)
            # ------------------------------#
            #   调整雷达图像尺寸
            # ------------------------------#
            scale = min(w / rw, h / rh)
            nw_r = int(rw * scale)
            nh_r = int(rh * scale)
            dx_r = (w - nw_r) // 2
            dy_r = (h - nh_r) // 2
            # ---------------------------------#
            #   将雷达图像多余的部分加上灰条
            # ---------------------------------#
            radar_image = radar_image.resize((nw_r, nh_r), Image.BICUBIC)
            new_image_r = Image.new('RGB', (w, h), (128, 128, 128))
            # new_image_r = Image.new('RGB', (w, h), (0, 0, 0))
            new_image_r.paste(radar_image, (dx_r, dy_r))
            radar_image_data = np.array(new_image_r, np.float32)
            # ---------------------------------#
            #   对真实框进行调整
            # ---------------------------------#
            if len(box) > 0:
                np.random.shuffle(box)
                box[:, [0, 2]] = box[:, [0, 2]] * nw / iw + dx
                box[:, [1, 3]] = box[:, [1, 3]] * nh / ih + dy
                box[:, 0:2][box[:, 0:2] < 0] = 0
                box[:, 2][box[:, 2] > w] = w
                box[:, 3][box[:, 3] > h] = h
                box_w = box[:, 2] - box[:, 0]
                box_h = box[:, 3] - box[:, 1]
                box = box[np.logical_and(box_w > 1, box_h > 1)]  # discard invalid box

            return cam_image_data,radar_image_data,box

        # ------------------------------------------#
        #   对图像进行缩放并且进行长和宽的扭曲
        # ------------------------------------------#
        new_ar = w / h * self.rand(1 - jitter, 1 + jitter) / self.rand(1 - jitter, 1 + jitter)
        scale = self.rand(.25, 2)
        if new_ar < 1:
            nh = int(scale * h)
            nw = int(nh * new_ar)
        else:
            nw = int(scale * w)
            nh = int(nw / new_ar)
        cam_image = cam_image.resize((nw, nh), Image.BICUBIC)
        radar_image=radar_image.resize((nw, nh), Image.BICUBIC)

        # ------------------------------------------#
        #   将视觉图像多余的部分加上灰条
        # ------------------------------------------#
        dx = int(self.rand(0, w - nw))
        dy = int(self.rand(0, h - nh))
        new_image = Image.new('RGB', (w, h), (128, 128, 128))
        new_image.paste(cam_image, (dx, dy))
        cam_image = new_image

        # ------------------------------------------#
        #   将雷达图像多余的部分加上灰条
        # ------------------------------------------#
        new_image_r = Image.new('RGB', (w, h), (128, 128, 128))
        # new_image_r = Image.new('RGB', (w, h), (0, 0, 0))
        new_image_r.paste(radar_image, (dx, dy))
        radar_image = new_image_r
        # ------------------------------------------#
        #   翻转图像
        # ------------------------------------------#
        flip = self.rand() < .5
        if flip:
            cam_image = cam_image.transpose(Image.FLIP_LEFT_RIGHT)
            radar_image = radar_image.transpose(Image.FLIP_LEFT_RIGHT)

        # ------------------------------------------#
        #   色域扭曲
        # ------------------------------------------#
        hue = self.rand(-hue, hue)
        sat = self.rand(1, sat) if self.rand() < .5 else 1 / self.rand(1, sat)
        val = self.rand(1, val) if self.rand() < .5 else 1 / self.rand(1, val)
        ##-------------视觉图像色域扭曲--------------##
        x = cv2.cvtColor(np.array(cam_image, np.float32) / 255, cv2.COLOR_RGB2HSV)
        x[..., 0] += hue * 360
        x[..., 0][x[..., 0] > 1] -= 1
        x[..., 0][x[..., 0] < 0] += 1
        x[..., 1] *= sat
        x[..., 2] *= val
        x[x[:, :, 0] > 360, 0] = 360
        x[:, :, 1:][x[:, :, 1:] > 1] = 1
        x[x < 0] = 0
        cam_image_data = cv2.cvtColor(x, cv2.COLOR_HSV2RGB) * 255
        ##-------------雷达图像不进行色域扭曲--------------##
        radar_image_data = radar_image

        # ---------------------------------#
        #   对真实框进行调整
        # ---------------------------------#
        if len(box) > 0:
            np.random.shuffle(box)
            box[:, [0, 2]] = box[:, [0, 2]] * nw / iw + dx
            box[:, [1, 3]] = box[:, [1, 3]] * nh / ih + dy
            if flip: box[:, [0, 2]] = w - box[:, [2, 0]]
            box[:, 0:2][box[:, 0:2] < 0] = 0
            box[:, 2][box[:, 2] > w] = w
            box[:, 3][box[:, 3] > h] = h
            box_w = box[:, 2] - box[:, 0]
            box_h = box[:, 3] - box[:, 1]
            box = box[np.logical_and(box_w > 1, box_h > 1)]

        return cam_image_data,radar_image_data,box

    def merge_bboxes(self, bboxes, cutx, cuty):
        merge_bbox = []
        for i in range(len(bboxes)):
            for box in bboxes[i]:
                tmp_box = []
                x1, y1, x2, y2 = box[0], box[1], box[2], box[3]

                if i == 0:
                    if y1 > cuty or x1 > cutx:
                        continue
                    if y2 >= cuty and y1 <= cuty:
                        y2 = cuty
                    if x2 >= cutx and x1 <= cutx:
                        x2 = cutx

                if i == 1:
                    if y2 < cuty or x1 > cutx:
                        continue
                    if y2 >= cuty and y1 <= cuty:
                        y1 = cuty
                    if x2 >= cutx and x1 <= cutx:
                        x2 = cutx

                if i == 2:
                    if y2 < cuty or x2 < cutx:
                        continue
                    if y2 >= cuty and y1 <= cuty:
                        y1 = cuty
                    if x2 >= cutx and x1 <= cutx:
                        x1 = cutx

                if i == 3:
                    if y1 > cuty or x2 < cutx:
                        continue
                    if y2 >= cuty and y1 <= cuty:
                        y2 = cuty
                    if x2 >= cutx and x1 <= cutx:
                        x1 = cutx
                tmp_box.append(x1)
                tmp_box.append(y1)
                tmp_box.append(x2)
                tmp_box.append(y2)
                tmp_box.append(box[-1])
                merge_bbox.append(tmp_box)
        return merge_bbox

    def get_random_data_with_Mosaic(self, annotation_line, input_shape, max_boxes=100, hue=.1, sat=1.5, val=1.5):
        h, w = input_shape
        min_offset_x = self.rand(0.25, 0.75)
        min_offset_y = self.rand(0.25, 0.75)

        nws = [int(w * self.rand(0.4, 1)), int(w * self.rand(0.4, 1)), int(w * self.rand(0.4, 1)),
               int(w * self.rand(0.4, 1))]
        nhs = [int(h * self.rand(0.4, 1)), int(h * self.rand(0.4, 1)), int(h * self.rand(0.4, 1)),
               int(h * self.rand(0.4, 1))]

        place_x = [int(w * min_offset_x) - nws[0], int(w * min_offset_x) - nws[1], int(w * min_offset_x),
                   int(w * min_offset_x)]
        place_y = [int(h * min_offset_y) - nhs[0], int(h * min_offset_y), int(h * min_offset_y),
                   int(h * min_offset_y) - nhs[3]]

        cam_image_datas = []
        radar_image_datas = []
        box_datas = []
        index = 0
        for line in annotation_line:
            # 每一行进行分割
            line_content = line.split()
            # 打开图片
            cam_image = Image.open(line_content[0])
            cam_image = cvtColor(cam_image)

            radar_image_path = line_content[0].replace("CAM_FRONT","RADAR_IMAGE").replace("jpg","png")
            radar_image = Image.open(radar_image_path)
            radar_image = cvtColor(radar_image)

            # 图片的大小
            iw, ih = cam_image.size
            rw, rh = radar_image.size
            # 保存框的位置
            box = np.array([np.array(list(map(int, box.split(',')))) for box in line_content[1:]])

            # 是否翻转图片
            flip = self.rand() < .5
            if flip and len(box) > 0:
                cam_image = cam_image.transpose(Image.FLIP_LEFT_RIGHT)
                radar_image = radar_image.transpose(Image.FLIP_LEFT_RIGHT)
                box[:, [0, 2]] = iw - box[:, [2, 0]]

            nw = nws[index]
            nh = nhs[index]
            cam_image = cam_image.resize((nw, nh), Image.BICUBIC)
            radar_image = radar_image.resize((nw, nh), Image.BICUBIC)

            # 将视觉图片进行放置，分别对应四张分割图片的位置
            dx = place_x[index]
            dy = place_y[index]
            new_image = Image.new('RGB', (w, h), (128, 128, 128))
            new_image.paste(cam_image, (dx, dy))
            cam_image_data = np.array(new_image)

            # 将雷达图片进行放置，分别对应四张分割图片的位置
            new_image_r = Image.new('RGB', (w, h), (128, 128, 128))
            # new_image_r = Image.new('RGB', (w, h), (0, 0, 0))

            new_image_r.paste(radar_image, (dx, dy))
            radar_image_data = np.array(new_image_r)

            index = index + 1
            box_data = []
            # 对box进行重新处理
            if len(box) > 0:
                np.random.shuffle(box)
                box[:, [0, 2]] = box[:, [0, 2]] * nw / iw + dx
                box[:, [1, 3]] = box[:, [1, 3]] * nh / ih + dy
                box[:, 0:2][box[:, 0:2] < 0] = 0
                box[:, 2][box[:, 2] > w] = w
                box[:, 3][box[:, 3] > h] = h
                box_w = box[:, 2] - box[:, 0]
                box_h = box[:, 3] - box[:, 1]
                box = box[np.logical_and(box_w > 1, box_h > 1)]
                box_data = np.zeros((len(box), 5))
                box_data[:len(box)] = box

            cam_image_datas.append(cam_image_data)
            radar_image_datas.append(radar_image_data)
            box_datas.append(box_data)

        # 将图片分割，放在一起
        cutx = int(w * min_offset_x)
        cuty = int(h * min_offset_y)
        ##-------视觉图像拼接---------##
        new_image = np.zeros([h, w, 3])
        new_image[:cuty, :cutx, :] = cam_image_datas[0][:cuty, :cutx, :]
        new_image[cuty:, :cutx, :] = cam_image_datas[1][cuty:, :cutx, :]
        new_image[cuty:, cutx:, :] = cam_image_datas[2][cuty:, cutx:, :]
        new_image[:cuty, cutx:, :] = cam_image_datas[3][:cuty, cutx:, :]
        ##-------雷达图像拼接---------##
        new_image_r = np.zeros([h, w, 3])
        new_image_r[:cuty, :cutx, :] = radar_image_datas[0][:cuty, :cutx, :]
        new_image_r[cuty:, :cutx, :] = radar_image_datas[1][cuty:, :cutx, :]
        new_image_r[cuty:, cutx:, :] = radar_image_datas[2][cuty:, cutx:, :]
        new_image_r[:cuty, cutx:, :] = radar_image_datas[3][:cuty, cutx:, :]

        # 进行色域变换
        hue = self.rand(-hue, hue)
        sat = self.rand(1, sat) if self.rand() < .5 else 1 / self.rand(1, sat)
        val = self.rand(1, val) if self.rand() < .5 else 1 / self.rand(1, val)
        ##-------------视觉图像色域扭曲--------------##
        x = cv2.cvtColor(np.array(new_image / 255, np.float32), cv2.COLOR_RGB2HSV)
        x[..., 0] += hue * 360
        x[..., 0][x[..., 0] > 1] -= 1
        x[..., 0][x[..., 0] < 0] += 1
        x[..., 1] *= sat
        x[..., 2] *= val
        x[x[:, :, 0] > 360, 0] = 360
        x[:, :, 1:][x[:, :, 1:] > 1] = 1
        x[x < 0] = 0
        cam_image_data = cv2.cvtColor(x, cv2.COLOR_HSV2RGB) * 255
        ##-------------雷达图像不进行色域扭曲--------------##
        radar_image_data=new_image_r

        # 对框进行进一步的处理
        new_boxes = self.merge_bboxes(box_datas, cutx, cuty)

        return cam_image_data,radar_image_data,new_boxes


# DataLoader中collate_fn使用
def nuscenes_dataset_collate(batch):
    cam_images = []
    radar_images = []
    bboxes = []
    for cam_img,radar_img,box in batch:
        cam_images.append(cam_img)
        radar_images.append(radar_img)
        bboxes.append(box)
    cam_images = np.array(cam_images)
    radar_images = np.array(radar_images)
    return cam_images,radar_images,bboxes