|
- # Copyright 2021 Huawei Technologies Co., Ltd
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- # ============================================================================
-
- import os
-
- import cv2
- import numpy as np
- import mindspore as ms
- from mindspore import nn
- from mindspore import context
- from mindspore import ops
- from mindspore.train.serialization import load_checkpoint, load_param_into_net, save_checkpoint
- from src import prepare_args
- from src.data.dataset import coco_id_dict
- from src.DETR.util import box_cxcywh_to_xyxy
- from src.DETR.backbone import build_backbone
- from src.DETR.detr import build_transformer, DETR
-
-
- def get_size_with_aspect_ratio(image_size, size, max_size=None):
- h, w = image_size
- if max_size is not None:
- min_original_size = float(min((w, h)))
- max_original_size = float(max((w, h)))
- if max_original_size / min_original_size * size > max_size:
- size = int(round(max_size * min_original_size / max_original_size))
-
- if (w <= h and w == size) or (h <= w and h == size):
- return (h, w)
-
- if w < h:
- ow = size
- oh = int(size * h / w)
- else:
- oh = size
- ow = int(size * w / h)
-
- return (oh, ow)
-
-
- class SOTAResize(object):
- def __init__(self, min_size, max_size):
- self.min_size = min_size
- self.max_size = max_size
-
- def __call__(self, img):
- h, w = img.shape[:2]
- nh, nw = get_size_with_aspect_ratio((h, w), self.min_size, self.max_size)
- size = (nh, nw)
- resize_img = cv2.resize(img, (nw, nh))
- print(f'resize size: {nh},{nw}')
- mask = np.zeros((nh, nw), dtype=np.bool_)
- return resize_img, mask, size
-
-
- class SOTAPad(object):
- def __init__(self, tgt_size):
- self.tgt_size = tgt_size
-
- def __call__(self, img, mask):
- c, h, w = img.shape
- new_img = np.zeros((c, self.tgt_size, self.tgt_size), dtype=np.float32)
- new_img[:, :h, :w] = img
- new_mask = np.ones((self.tgt_size, self.tgt_size), dtype=np.float32)
- new_mask[:h, :w] = 0
- return new_img, new_mask
-
-
- class Normalize(object):
- def __init__(self, mean, std):
- self.mean = mean
- self.std = std
-
- def __call__(self, image):
- image = (image - self.mean) / self.std
- image = image.transpose(2, 0, 1)
- return image
-
-
- def save_img(image, name):
- image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
- image = image.astype(np.uint8)
- name = name + '.jpg'
- cv2.imwrite(name, image)
-
-
- def save_result(image, scores, bboxes, labels):
- scores = np.squeeze(scores.asnumpy())
- bboxes = np.squeeze(bboxes.asnumpy())
- labels = np.squeeze(labels.asnumpy())
-
- for score, bbox, label in zip(scores, bboxes, labels):
- if score > 0.9:
- # if score > 0.003:
- x0, y0, x1, y1 = list(map(int, bbox))
- print(f'[{x0},{y0},{x1},{y1}]==[{coco_id_dict[label]}]==[{score}]')
- image = cv2.rectangle(image, (x0, y0), (x1, y1), (0, 0, 255), 2)
- image = cv2.putText(image, coco_id_dict[label], (x0, y0), cv2.FONT_HERSHEY_SIMPLEX, 0.75, (0, 255, 0), 1)
- save_img(image, 'result_img')
-
-
- def build_net(args):
- num_classes = args.num_classes
-
- backbone = build_backbone(args)
- transformer = build_transformer(args)
- model = DETR(
- backbone,
- transformer,
- num_classes=num_classes,
- num_queries=args.num_queries,
- aux_loss=args.aux_loss
- )
- return model
-
-
- def detect_image():
- # config
- args = prepare_args()
- args.aux_loss = False
-
- # context.set_context(mode=context.PYNATIVE_MODE, device_target=args.device_target, device_id=args.device_id)
- context.set_context(mode=context.PYNATIVE_MODE, device_target="CPU")
-
- # build model and load checkpoint
- net = build_net(args)
- net.set_train(False)
- ckpt = load_checkpoint('ms_detr_sota.ckpt')
- new_ckpt = {}
- for k, v in ckpt.items():
- if 'optimizer.' in k:
- k = k.replace('optimizer.', '')
- if 'network.net.' in k:
- k = k.replace('network.net.', '')
- new_ckpt[k] = v
- load_param_into_net(net, new_ckpt, strict_load=True)
-
- # load image
- img = cv2.imread('demo.jpg', cv2.IMREAD_COLOR)
- img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
- ori_img = img
-
- # transform
- h, w, _ = img.shape
- print(f'ori size: {h},{w}')
-
- trans1 = SOTAResize(800, 1333)
- trans2 = Normalize(mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375])
- trans3 = SOTAPad(1344)
-
- img, mask, new_size = trans1(img)
- img = trans2(img)
- img, mask = trans3(img, mask)
- re_h, re_w = new_size
-
- # build model input tensor_list
- tensor = ms.Tensor(img[None], dtype=ms.float32)
- mask = ms.Tensor(mask[None], dtype=ms.float32)
-
- # forward
- print(tensor.shape)
- print(mask.shape)
- r = net(tensor, mask)
- pred_logits, pred_boxes = r
- print(pred_logits.shape)
- print(pred_boxes.shape)
-
- # post process
- prob = nn.Softmax()(pred_logits)
- labels, scores = ops.ArgMaxWithValue(axis=-1)(prob[..., :-1])
- boxes = box_cxcywh_to_xyxy(pred_boxes)
- scale_fct = ms.Tensor([w, h, w, h], dtype=ms.float32)
- boxes = boxes * scale_fct[None, None, :]
-
- # save result
- save_result(ori_img, scores, boxes, labels)
-
-
- if __name__ == '__main__':
- detect_image()
|