|
- # Copyright 2021 Huawei Technologies Co., Ltd
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- # ============================================================================
-
- """Base dataset generator definition."""
- import random
- import cv2
- import numpy as np
-
- import mindspore.ops as P
- from mindspore import Tensor
- from mindspore.common import dtype
-
-
- class BaseDataset:
- """Base dataset generator class."""
- def __init__(self,
- ignore_label=-1,
- base_size=2048,
- crop_size=(512, 1024),
- downsample_rate=1,
- scale_factor=16,
- mean=None,
- std=None):
-
- self.base_size = base_size
- self.crop_size = crop_size
- self.ignore_label = ignore_label
-
- self.mean = mean
- self.std = std
- self.scale_factor = scale_factor
- self.downsample_rate = 1. / downsample_rate
-
- self.files = []
-
- def __len__(self):
- return len(self.files)
-
- def input_transform(self, image):
- """Transform data format of images."""
- image = image.astype(np.float32)[:, :, ::-1] # BGR2RGB
- image = image / 255.0
- image -= self.mean
- image /= self.std
- return image
-
- def label_transform(self, label):
- """Transform data format of labels."""
- return np.array(label).astype('int32')
-
- def pad_image(self, image, h, w, shape, padvalue):
- """Pad an image."""
- pad_image = image.copy()
- pad_h = max(shape[0] - h, 0)
- pad_w = max(shape[1] - w, 0)
- if pad_h > 0 or pad_w > 0:
- pad_image = cv2.copyMakeBorder(image, 0, pad_h, 0,
- pad_w, cv2.BORDER_CONSTANT,
- value=padvalue)
-
- return pad_image
-
- def rand_crop(self, image, label):
- """Crop a feature at a random location."""
- h, w, _ = image.shape
- image = self.pad_image(image, h, w, self.crop_size, (0.0, 0.0, 0.0))
- label = self.pad_image(label, h, w, self.crop_size, (self.ignore_label,))
-
- new_h, new_w = label.shape
- x = random.randint(0, new_w - self.crop_size[1])
- y = random.randint(0, new_h - self.crop_size[0])
- image = image[y:y + self.crop_size[0], x:x + self.crop_size[1]]
- label = label[y:y + self.crop_size[0], x:x + self.crop_size[1]]
-
- return image, label
-
- def multi_scale_aug(self, image, label=None, rand_scale=1, rand_crop=True):
- """Augment feature into different scales."""
- long_size = np.int(self.base_size * rand_scale + 0.5)
- h, w, _ = image.shape
- if h > w:
- new_h = long_size
- new_w = np.int(w * long_size / h + 0.5)
- else:
- new_w = long_size
- new_h = np.int(h * long_size / w + 0.5)
-
- image = cv2.resize(image, (new_w, new_h), interpolation=cv2.INTER_LINEAR)
- # image = cv2.resize(image, (new_w, new_h), interpolation=cv2.INTER_NEAREST)
- if label is not None:
- label = cv2.resize(label, (new_w, new_h), interpolation=cv2.INTER_NEAREST)
- else:
- return image
-
- if rand_crop:
- image, label = self.rand_crop(image, label)
-
- return image, label
-
- def gen_sample(self, image, label, multi_scale=False, is_flip=False):
- """Data preprocessing."""
- if multi_scale:
- rand_scale = 0.5 + random.randint(0, self.scale_factor) / 10.0
- image, label = self.multi_scale_aug(image, label, rand_scale=rand_scale)
-
- image = self.input_transform(image) # HWC
- label = self.label_transform(label) # HW
-
- image = image.transpose((2, 0, 1)) # CHW
-
- if is_flip:
- flip = np.random.choice(2) * 2 - 1
- image = image[:, :, ::flip]
- label = label[:, ::flip]
-
- if self.downsample_rate != 1:
- label = cv2.resize(label, None,
- fx=self.downsample_rate,
- fy=self.downsample_rate,
- interpolation=cv2.INTER_NEAREST)
- # image CHW, label HW
- return image, label
-
- def inference(self, model, image, flip=False):
- """Inference using one feature."""
- shape = image.shape
- pred = model(image)
- pred = pred[-1] # image NCHW
- pred = P.ResizeBilinear((shape[-2], shape[-1]))(pred)
- if flip:
- flip_img = image.asnumpy()[:, :, :, ::-1]
- flip_output = model(Tensor(flip_img.copy()))
- flip_output = P.ResizeBilinear((shape[-2], shape[-1]))(flip_output)
- flip_pred = flip_output.asnumpy()
- flip_pred = Tensor(flip_pred[:, :, :, ::-1])
- pred = P.Add()(pred, flip_pred)
- pred = Tensor(pred.asnumpy() * 0.5)
- return P.Exp()(pred)
-
- def multi_scale_inference(self, model, image, scales=None, flip=False):
- """Inference using multi-scale features."""
- batch, _, ori_height, ori_width = image.shape
- assert batch == 1, "only supporting batchsize 1."
- image = image.asnumpy()[0].transpose((1, 2, 0)).copy()
- stride_h = np.int(self.crop_size[0] * 2.0 / 3.0)
- stride_w = np.int(self.crop_size[1] * 2.0 / 3.0)
-
- final_pred = Tensor(np.zeros([1, self.num_classes, ori_height, ori_width]), dtype=dtype.float32)
- padvalue = -1.0 * np.array(self.mean) / np.array(self.std)
- for scale in scales:
- new_img = self.multi_scale_aug(image=image, rand_scale=scale, rand_crop=False)
- height, width = new_img.shape[:-1]
-
- if max(height, width) <= np.min(self.crop_size):
- new_img = self.pad_image(new_img, height, width,
- self.crop_size, padvalue)
- new_img = new_img.transpose((2, 0, 1))
- new_img = np.expand_dims(new_img, axis=0)
- new_img = Tensor(new_img)
- preds = self.inference(model, new_img, flip)
- preds = preds[:, :, 0:height, 0:width]
- else:
- if height < self.crop_size[0] or width < self.crop_size[1]:
- new_img = self.pad_image(new_img, height, width,
- self.crop_size, padvalue)
- new_h, new_w = new_img.shape[:-1]
- rows = np.int(np.ceil(1.0 * (new_h -
- self.crop_size[0]) / stride_h)) + 1
- cols = np.int(np.ceil(1.0 * (new_w -
- self.crop_size[1]) / stride_w)) + 1
- preds = Tensor(np.zeros([1, self.num_classes, new_h, new_w]), dtype=dtype.float32)
- count = Tensor(np.zeros([1, 1, new_h, new_w]), dtype=dtype.float32)
-
- for r in range(rows):
- for c in range(cols):
- h0 = r * stride_h
- w0 = c * stride_w
- h1 = min(h0 + self.crop_size[0], new_h)
- w1 = min(w0 + self.crop_size[1], new_w)
- crop_img = new_img[h0:h1, w0:w1, :]
- if h1 == new_h or w1 == new_w:
- crop_img = self.pad_image(crop_img,
- h1 - h0,
- w1 - w0,
- self.crop_size,
- padvalue)
- crop_img = crop_img.transpose((2, 0, 1))
- crop_img = np.expand_dims(crop_img, axis=0)
- crop_img = Tensor(crop_img)
- pred = self.inference(model, crop_img, flip)
- preds[:, :, h0:h1, w0:w1] += pred[:, :, 0:h1 - h0, 0:w1 - w0]
- count[:, :, h0:h1, w0:w1] += 1
- preds = preds / count
- preds = preds[:, :, :height, :width]
- preds = P.ResizeBilinear((ori_height, ori_width))(preds)
- final_pred += preds
- final_pred = P.Add()(final_pred, preds)
- return final_pred
|