Smart_City_Model_Zoo
/
CongestionLevel

 
			
							import os
import numpy as np
import collections
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.init import normal_, constant_
import sys

sys.path.append(os.path.join(os.getcwd(),r"alg_manager/CongestionLevel"))

from transforms.spatial_transforms import *
from ops.basic_ops import ConsensusModule, Identity
from ops.layers import PA_layer, VIP_layer
# from dataloader import CLMDataSet


class VideoRecord(object):
    def __init__(self, row):
        self._data = row

    @property
    def path(self):
        return self._data[0]

    @property
    def num_frames(self):
        return int(self._data[1])


class CongestionLevel(nn.Module):
    def __init__(self, PCL_mode=True, checkpoint_path=None, video_root='./videos/', image_prefix='img_', batch_size=16,
                 num_workers=8):
        super(CongestionLevel, self).__init__()
        self.batch_size = batch_size
        self.checkpoint_path = checkpoint_path
        self.modality = 'RGB'
        self.num_segments = 8
        self.data_length = 1
        self.num_frames = 8
        self.num_class = 5
        self.num_workers = num_workers
        self.reshape = True
        self.before_softmax = True
        self.dropout = 0.8
        self.crop_num = 1
        self.consensus_type = 'avg'
        self.pretrain = "kinetics"
        self.freeze_base_model = True
        self.PCL_mode = PCL_mode
        self.wo_loader = True
        self.video_root = video_root
        self.image_prefix = image_prefix
        self.image_tmpl = self.image_prefix + '{:03d}.jpg'
        # if not before_softmax and consensus_type != 'avg':
        #     raise ValueError("Only avg consensus can be used after Softmax")
        self.new_length = 1 if self.modality == "RGB" or self.modality == "PA" else 5
        base_model = 'BNInception'

        self._prepare_base_model(base_model)
        self._init_weights()
        # feature_dim = self._prepare_tsn(num_class)

        if self.modality == 'Flow':
            print("Converting the ImageNet model to a flow init model")
            self.base_model = self._construct_flow_model(self.base_model)
            print("Done. Flow model ready...")
        elif self.modality == 'RGBDiff':
            print("Converting the ImageNet model to RGB+Diff init model")
            self.base_model = self._construct_diff_model(self.base_model)
            print("Done. RGBDiff model ready.")

        self.consensus = ConsensusModule(self.consensus_type)

        if not self.before_softmax:
            print("Using Softmax...")
            self.softmax = nn.Softmax(dim=1)

        self._enable_pbn = False

        if self.PCL_mode:
            normalize = GroupNormalize(self.input_mean, self.input_std)
            self.frames_transforms = torchvision.transforms.Compose([
                GroupScale(int(self.input_size * 256 // 224)),
                GroupCenterCrop(int(self.input_size)),
                Stack(roll=True),
                ToTorchFormatTensor(div=False),
                normalize
            ])
            # self._init_data_loader()

    def __call__(self, input):
        sample_len = (3 if self.modality == "RGB" or self.modality == "PA" else 2) * self.new_length

        if self.PCL_mode and self.wo_loader:
            input = self.frames_transforms(input)
            input = input.cuda()

        if self.modality == 'RGBDiff':
            sample_len = 3 * self.new_length
            input = self._get_diff(input)

        # base_out = self.base_model(input.view((-1, sample_len) + input.size()[-2:]))
        if self.modality == "RGB":
            base_out = self.base_model(input.view((-1, sample_len) + input.size()[-2:]))
        elif self.modality == "PA":
            base_out = self.PA(input.view((-1, sample_len) + input.size()[-2:]))
            base_out = self.base_model(base_out)
        base_out = self.VIP(base_out)
        # if self.dropout > 0:
        #     base_out = self.new_fc(base_out)

        if not self.before_softmax:
            base_out = self.softmax(base_out)
        # if self.reshape:
        #     base_out = base_out.view((-1, self.num_segments) + base_out.size()[1:])

        # print(base_out.size())
        # print(base_out[0][0])
        # if self.reshape:
        #     base_out = base_out.view((-1, (self.data_length-1)) + base_out.size()[1:])
        output = torch.mean(base_out, dim=1)
        if self.PCL_mode:
            return output.mul(self.num_class).clamp(1., 5.).round().type(torch.int).cpu().numpy()
        # output = self.consensus(base_out)
        # print(output.size())
        return output, base_out
        # return base_out

    # def _prepare_tsn(self, num_class):
    #     # feature_dim = getattr(self.base_model, self.base_model.last_layer_name).in_features
    #     feature_dim = 1024*7
    #     if self.dropout == 0:
    #         setattr(self.base_model, self.base_model.last_layer_name, nn.Linear(feature_dim, num_class))
    #         self.new_fc = None
    #     else:
    #         setattr(self.base_model, self.base_model.last_layer_name, nn.Dropout(p=self.dropout))
    #         self.new_fc = nn.Linear(feature_dim, num_class)

    #     std = 0.001
    #     if self.new_fc is None:
    #         normal_(getattr(self.base_model, self.base_model.last_layer_name).weight, 0, std)
    #         constant_(getattr(self.base_model, self.base_model.last_layer_name).bias, 0)
    #     else:
    #         normal_(self.new_fc.weight, 0, std)
    #         constant_(self.new_fc.bias, 0)
    #     return feature_dim
    # def _init_data_loader(self):
    #     self.record_list = list()
    #
    #     if self.video_root.startswith('./'):
    #         self.video_root = '{0}/{1}'.format(os.path.dirname(os.path.realpath(__file__)),
    #                                            self.video_root.replace('./', ''))
    #
    #     record_list = [os.path.join(self.video_root, path) for path in os.listdir(self.video_root) if
    #                    os.path.isdir(os.path.join(self.video_root, path))]
    #     for path in record_list:
    #         img_list = [os.path.join(path, img) for img in os.listdir(path) if img.startswith(self.image_prefix)]
    #         assert len(img_list) == 32, ValueError("视频段长度应为32！当前长度：{1},当前路径：{0}\n".format(path, len(img_list)))
    #         self.record_list.append(VideoRecord([path, len(img_list)]))
    #
    #     self.data_loader = torch.utils.data.DataLoader(
    #         CLMDataSet(self.record_list, num_segments=self.num_segments, new_length=self.new_length,
    #                    modality=self.modality, image_tmpl=self.image_tmpl,
    #                    transform=self.frames_transforms, random_shift=False),
    #         batch_size=self.batch_size, shuffle=False, num_workers=self.num_workers, pin_memory=True)

    def _prepare_base_model(self, base_model):

        if 'resnet' in base_model or 'vgg' in base_model:
            self.base_model = getattr(torchvision.models, base_model)(True)
            self.base_model.last_layer_name = 'fc'
            self.input_size = 224
            self.input_mean = [0.485, 0.456, 0.406]
            self.input_std = [0.229, 0.224, 0.225]

            if self.modality == 'Flow':
                self.input_mean = [0.5]
                self.input_std = [np.mean(self.input_std)]
            elif self.modality == 'RGBDiff':
                self.input_mean = [0.485, 0.456, 0.406] + [0] * 3 * self.new_length
                self.input_std = self.input_std + [np.mean(self.input_std) * 2] * 3 * self.new_length
        elif 'BNInception' in base_model:
            import arch
            if self.modality == "PA":
                self.PA = PA_layer(self.data_length)
            self.base_model = getattr(arch, base_model)(batch_size=self.batch_size, data_length=self.data_length,
                                                        num_frames=self.num_frames, pretrain=self.pretrain,
                                                        num_classes=self.num_class,
                                                        checkpoint_path=self.checkpoint_path)
            self.VIP = VIP_layer(self.num_frames, 1024, 1, self.dropout)
            self.input_size = 224
            self.input_mean = [104, 117, 128]
            self.input_std = [1]

            if self.modality == 'Flow':
                self.input_mean = [128]
            elif self.modality == 'RGBDiff':
                self.input_mean = self.input_mean * (1 + self.new_length)

        elif 'ECOLite' in base_model:
            import arch
            self.base_model = getattr(arch, base_model)(batch_size=self.batch_size, data_length=self.data_length,
                                                        num_frames=self.num_frames, pretrain=self.pretrain,
                                                        num_classes=self.num_class)
            self.base_model.last_layer_name = 'fc_final'
            self.input_size = 224
            self.input_mean = [104, 117, 128]
            self.input_std = [1]

            if self.modality == 'Flow':
                self.input_mean = [128]
            elif self.modality == 'RGBDiff':
                self.input_mean = self.input_mean * (1 + self.new_length)

        elif 'inception' in base_model:
            import arch
            self.base_model = getattr(arch, base_model)()
            self.base_model.last_layer_name = 'classif'
            self.input_size = 299
            self.input_mean = [0.5]
            self.input_std = [0.5]
        else:
            raise ValueError('Unknown base model: {}'.format(base_model))

    def _init_weights(self):
        if self.checkpoint_path.startswith('./'):
            self.checkpoint_path = '{0}/{1}'.format(os.path.dirname(os.path.realpath(__file__)),
                                                    self.checkpoint_path.replace('./', ''))
        if not self.checkpoint_path or not os.path.isfile(self.checkpoint_path):
            print(("=> no checkpoint found at '{}'".format(self.checkpoint_path)))
            return

        print(("=> loading checkpoint '{}'".format(self.checkpoint_path)))
        checkpoint = torch.load(self.checkpoint_path)
        model_dict = self.state_dict()
        new_state_dict = {k[7:]: v for k, v in checkpoint['state_dict'].items() if k[7:] in model_dict}
        un_init_dict_keys = [k for k in model_dict.keys() if k not in new_state_dict]
        print("un_init_dict_keys: ", un_init_dict_keys)
        for k in un_init_dict_keys:
            new_state_dict[k] = torch.DoubleTensor(model_dict[k].size()).zero_()
        self.load_state_dict(new_state_dict)

        if self.modality == 'PA':
            for name, param in self.PA.named_parameters():
                param.requires_grad = False
        for name, param in self.base_model.named_parameters():
            param.requires_grad = False
        for name, param in self.VIP.named_parameters():
            param.requires_grad = False

    def train(self, mode=True):
        """
        Override the default train() to freeze the BN parameters
        :return:
        """
        super(CongestionLevel, self).train(mode)
        count = 0
        if self._enable_pbn:
            print("Freezing BatchNorm2D except the first one.")
            for m in self.base_model.modules():
                if isinstance(m, nn.BatchNorm2d):
                    count += 1
                    if count >= (2 if self._enable_pbn else 1):
                        m.eval()
                        # shutdown update in frozen mode
                        m.weight.requires_grad = False
                        m.bias.requires_grad = False

    def partialBN(self, enable):
        self._enable_pbn = enable

    def get_optim_policies(self):
        first_conv_weight = []
        first_conv_bias = []
        normal_weight = []
        normal_bias = []
        bn = []

        conv_cnt = 0
        bn_cnt = 0
        for m in self.modules():
            if isinstance(m, torch.nn.Conv2d) or isinstance(m, torch.nn.Conv1d) or isinstance(m, torch.nn.Conv3d):
                ps = list(m.parameters())
                conv_cnt += 1
                if conv_cnt == 1:
                    first_conv_weight.append(ps[0])
                    if len(ps) == 2:
                        first_conv_bias.append(ps[1])
                else:
                    normal_weight.append(ps[0])
                    if len(ps) == 2:
                        normal_bias.append(ps[1])
            elif isinstance(m, torch.nn.Linear):
                ps = list(m.parameters())
                normal_weight.append(ps[0])
                if len(ps) == 2:
                    normal_bias.append(ps[1])

            elif isinstance(m, torch.nn.BatchNorm1d):
                bn.extend(list(m.parameters()))
            elif isinstance(m, torch.nn.BatchNorm2d) or isinstance(m, torch.nn.BatchNorm3d):
                bn_cnt += 1
                # later BN's are frozen
                if not self._enable_pbn or bn_cnt == 1:
                    bn.extend(list(m.parameters()))
            elif len(m._modules) == 0:
                if len(list(m.parameters())) > 0:
                    raise ValueError("New atomic module type: {}. Need to give it a learning policy".format(type(m)))

        return [
            {'params': first_conv_weight, 'lr_mult': 5 if self.modality == 'PA' else 1, 'decay_mult': 1,
             'name': "first_conv_weight"},
            {'params': first_conv_bias, 'lr_mult': 10 if self.modality == 'PA' else 2, 'decay_mult': 0,
             'name': "first_conv_bias"},
            {'params': normal_weight, 'lr_mult': 1, 'decay_mult': 1,
             'name': "normal_weight"},
            {'params': normal_bias, 'lr_mult': 2, 'decay_mult': 0,
             'name': "normal_bias"},
            {'params': bn, 'lr_mult': 1, 'decay_mult': 0,
             'name': "BN scale/shift"},
        ]

    def _get_diff(self, input, keep_rgb=False):
        input_c = 3 if self.modality in ["RGB", "PA", "RGBDiff"] else 2
        input_view = input.view((-1, self.num_segments, self.new_length + 1, input_c,) + input.size()[2:])
        if keep_rgb:
            new_data = input_view.clone()
        else:
            new_data = input_view[:, :, 1:, :, :, :].clone()

        for x in reversed(list(range(1, self.new_length + 1))):
            if keep_rgb:
                new_data[:, :, x, :, :, :] = input_view[:, :, x, :, :, :] - input_view[:, :, x - 1, :, :, :]
            else:
                new_data[:, :, x - 1, :, :, :] = input_view[:, :, x, :, :, :] - input_view[:, :, x - 1, :, :, :]

        return new_data

    def _construct_flow_model(self, base_model):
        # modify the convolution layers
        # Torch models are usually defined in a hierarchical way.
        # nn.modules.children() return all sub modules in a DFS manner
        modules = list(self.base_model.modules())
        first_conv_idx = list(filter(lambda x: isinstance(modules[x], nn.Conv2d), list(range(len(modules)))))[0]
        conv_layer = modules[first_conv_idx]
        container = modules[first_conv_idx - 1]

        # modify parameters, assume the first blob contains the convolution kernels
        params = [x.clone() for x in conv_layer.parameters()]
        kernel_size = params[0].size()
        new_kernel_size = kernel_size[:1] + (2 * self.new_length,) + kernel_size[2:]
        new_kernels = params[0].data.mean(dim=1, keepdim=True).expand(new_kernel_size).contiguous()

        new_conv = nn.Conv2d(2 * self.new_length, conv_layer.out_channels,
                             conv_layer.kernel_size, conv_layer.stride, conv_layer.padding,
                             bias=True if len(params) == 2 else False)
        new_conv.weight.data = new_kernels
        if len(params) == 2:
            new_conv.bias.data = params[1].data  # add bias if neccessary
        layer_name = list(container.state_dict().keys())[0][:-7]  # remove .weight suffix to get the layer name

        # replace the first convlution layer
        setattr(container, layer_name, new_conv)
        return base_model

    def _construct_diff_model(self, base_model, keep_rgb=False):
        # modify the convolution layers
        # Torch models are usually defined in a hierarchical way.
        # nn.modules.children() return all sub modules in a DFS manner
        modules = list(self.base_model.modules())
        first_conv_idx = list(filter(lambda x: isinstance(modules[x], nn.Conv2d), list(range(len(modules)))))[0]
        conv_layer = modules[first_conv_idx]
        container = modules[first_conv_idx - 1]

        # modify parameters, assume the first blob contains the convolution kernels
        params = [x.clone() for x in conv_layer.parameters()]
        kernel_size = params[0].size()
        if not keep_rgb:
            new_kernel_size = kernel_size[:1] + (3 * self.new_length,) + kernel_size[2:]
            new_kernels = params[0].data.mean(dim=1, keepdim=True).expand(new_kernel_size).contiguous()
        else:
            new_kernel_size = kernel_size[:1] + (3 * self.new_length,) + kernel_size[2:]
            new_kernels = torch.cat(
                (params[0].data, params[0].data.mean(dim=1, keepdim=True).expand(new_kernel_size).contiguous()),
                1)
            new_kernel_size = kernel_size[:1] + (3 + 3 * self.new_length,) + kernel_size[2:]

        new_conv = nn.Conv2d(new_kernel_size[1], conv_layer.out_channels,
                             conv_layer.kernel_size, conv_layer.stride, conv_layer.padding,
                             bias=True if len(params) == 2 else False)
        new_conv.weight.data = new_kernels
        if len(params) == 2:
            new_conv.bias.data = params[1].data  # add bias if neccessary
        layer_name = list(container.state_dict().keys())[0][:-7]  # remove .weight suffix to get the layer name

        # replace the first convolution layer
        setattr(container, layer_name, new_conv)
        return base_model

    @property
    def crop_size(self):
        return self.input_size

    @property
    def scale_size(self):
        return self.input_size * 256 // 224

    def get_augmentation(self):
        if self.modality == 'RGB' or self.modality == 'PA':
            return torchvision.transforms.Compose([GroupMultiScaleCrop(self.input_size, [1, .875, .75, .66]),
                                                   GroupRandomHorizontalFlip(is_flow=False)])
        elif self.modality == 'Flow':
            return torchvision.transforms.Compose([GroupMultiScaleCrop(self.input_size, [1, .875, .75]),
                                                   GroupRandomHorizontalFlip(is_flow=True)])
        elif self.modality == 'RGBDiff':
            return torchvision.transforms.Compose([GroupMultiScaleCrop(self.input_size, [1, .875, .75]),
                                                   GroupRandomHorizontalFlip(is_flow=False)])