|
- import os
- import numpy as np
- import collections
- import torch
- import torch.nn as nn
- import torch.nn.functional as F
- from torch.nn.init import normal_, constant_
- import sys
-
- sys.path.append(os.path.join(os.getcwd(),r"alg_manager/CongestionLevel"))
-
- from transforms.spatial_transforms import *
- from ops.basic_ops import ConsensusModule, Identity
- from ops.layers import PA_layer, VIP_layer
- # from dataloader import CLMDataSet
-
-
- class VideoRecord(object):
- def __init__(self, row):
- self._data = row
-
- @property
- def path(self):
- return self._data[0]
-
- @property
- def num_frames(self):
- return int(self._data[1])
-
-
- class CongestionLevel(nn.Module):
- def __init__(self, PCL_mode=True, checkpoint_path=None, video_root='./videos/', image_prefix='img_', batch_size=16,
- num_workers=8):
- super(CongestionLevel, self).__init__()
- self.batch_size = batch_size
- self.checkpoint_path = checkpoint_path
- self.modality = 'RGB'
- self.num_segments = 8
- self.data_length = 1
- self.num_frames = 8
- self.num_class = 5
- self.num_workers = num_workers
- self.reshape = True
- self.before_softmax = True
- self.dropout = 0.8
- self.crop_num = 1
- self.consensus_type = 'avg'
- self.pretrain = "kinetics"
- self.freeze_base_model = True
- self.PCL_mode = PCL_mode
- self.wo_loader = True
- self.video_root = video_root
- self.image_prefix = image_prefix
- self.image_tmpl = self.image_prefix + '{:03d}.jpg'
- # if not before_softmax and consensus_type != 'avg':
- # raise ValueError("Only avg consensus can be used after Softmax")
- self.new_length = 1 if self.modality == "RGB" or self.modality == "PA" else 5
- base_model = 'BNInception'
-
- self._prepare_base_model(base_model)
- self._init_weights()
- # feature_dim = self._prepare_tsn(num_class)
-
- if self.modality == 'Flow':
- print("Converting the ImageNet model to a flow init model")
- self.base_model = self._construct_flow_model(self.base_model)
- print("Done. Flow model ready...")
- elif self.modality == 'RGBDiff':
- print("Converting the ImageNet model to RGB+Diff init model")
- self.base_model = self._construct_diff_model(self.base_model)
- print("Done. RGBDiff model ready.")
-
- self.consensus = ConsensusModule(self.consensus_type)
-
- if not self.before_softmax:
- print("Using Softmax...")
- self.softmax = nn.Softmax(dim=1)
-
- self._enable_pbn = False
-
- if self.PCL_mode:
- normalize = GroupNormalize(self.input_mean, self.input_std)
- self.frames_transforms = torchvision.transforms.Compose([
- GroupScale(int(self.input_size * 256 // 224)),
- GroupCenterCrop(int(self.input_size)),
- Stack(roll=True),
- ToTorchFormatTensor(div=False),
- normalize
- ])
- # self._init_data_loader()
-
- def __call__(self, input):
- sample_len = (3 if self.modality == "RGB" or self.modality == "PA" else 2) * self.new_length
-
- if self.PCL_mode and self.wo_loader:
- input = self.frames_transforms(input)
- input = input.cuda()
-
- if self.modality == 'RGBDiff':
- sample_len = 3 * self.new_length
- input = self._get_diff(input)
-
- # base_out = self.base_model(input.view((-1, sample_len) + input.size()[-2:]))
- if self.modality == "RGB":
- base_out = self.base_model(input.view((-1, sample_len) + input.size()[-2:]))
- elif self.modality == "PA":
- base_out = self.PA(input.view((-1, sample_len) + input.size()[-2:]))
- base_out = self.base_model(base_out)
- base_out = self.VIP(base_out)
- # if self.dropout > 0:
- # base_out = self.new_fc(base_out)
-
- if not self.before_softmax:
- base_out = self.softmax(base_out)
- # if self.reshape:
- # base_out = base_out.view((-1, self.num_segments) + base_out.size()[1:])
-
- # print(base_out.size())
- # print(base_out[0][0])
- # if self.reshape:
- # base_out = base_out.view((-1, (self.data_length-1)) + base_out.size()[1:])
- output = torch.mean(base_out, dim=1)
- if self.PCL_mode:
- return output.mul(self.num_class).clamp(1., 5.).round().type(torch.int).cpu().numpy()
- # output = self.consensus(base_out)
- # print(output.size())
- return output, base_out
- # return base_out
-
- # def _prepare_tsn(self, num_class):
- # # feature_dim = getattr(self.base_model, self.base_model.last_layer_name).in_features
- # feature_dim = 1024*7
- # if self.dropout == 0:
- # setattr(self.base_model, self.base_model.last_layer_name, nn.Linear(feature_dim, num_class))
- # self.new_fc = None
- # else:
- # setattr(self.base_model, self.base_model.last_layer_name, nn.Dropout(p=self.dropout))
- # self.new_fc = nn.Linear(feature_dim, num_class)
-
- # std = 0.001
- # if self.new_fc is None:
- # normal_(getattr(self.base_model, self.base_model.last_layer_name).weight, 0, std)
- # constant_(getattr(self.base_model, self.base_model.last_layer_name).bias, 0)
- # else:
- # normal_(self.new_fc.weight, 0, std)
- # constant_(self.new_fc.bias, 0)
- # return feature_dim
- # def _init_data_loader(self):
- # self.record_list = list()
- #
- # if self.video_root.startswith('./'):
- # self.video_root = '{0}/{1}'.format(os.path.dirname(os.path.realpath(__file__)),
- # self.video_root.replace('./', ''))
- #
- # record_list = [os.path.join(self.video_root, path) for path in os.listdir(self.video_root) if
- # os.path.isdir(os.path.join(self.video_root, path))]
- # for path in record_list:
- # img_list = [os.path.join(path, img) for img in os.listdir(path) if img.startswith(self.image_prefix)]
- # assert len(img_list) == 32, ValueError("视频段长度应为32!当前长度:{1},当前路径:{0}\n".format(path, len(img_list)))
- # self.record_list.append(VideoRecord([path, len(img_list)]))
- #
- # self.data_loader = torch.utils.data.DataLoader(
- # CLMDataSet(self.record_list, num_segments=self.num_segments, new_length=self.new_length,
- # modality=self.modality, image_tmpl=self.image_tmpl,
- # transform=self.frames_transforms, random_shift=False),
- # batch_size=self.batch_size, shuffle=False, num_workers=self.num_workers, pin_memory=True)
-
- def _prepare_base_model(self, base_model):
-
- if 'resnet' in base_model or 'vgg' in base_model:
- self.base_model = getattr(torchvision.models, base_model)(True)
- self.base_model.last_layer_name = 'fc'
- self.input_size = 224
- self.input_mean = [0.485, 0.456, 0.406]
- self.input_std = [0.229, 0.224, 0.225]
-
- if self.modality == 'Flow':
- self.input_mean = [0.5]
- self.input_std = [np.mean(self.input_std)]
- elif self.modality == 'RGBDiff':
- self.input_mean = [0.485, 0.456, 0.406] + [0] * 3 * self.new_length
- self.input_std = self.input_std + [np.mean(self.input_std) * 2] * 3 * self.new_length
- elif 'BNInception' in base_model:
- import arch
- if self.modality == "PA":
- self.PA = PA_layer(self.data_length)
- self.base_model = getattr(arch, base_model)(batch_size=self.batch_size, data_length=self.data_length,
- num_frames=self.num_frames, pretrain=self.pretrain,
- num_classes=self.num_class,
- checkpoint_path=self.checkpoint_path)
- self.VIP = VIP_layer(self.num_frames, 1024, 1, self.dropout)
- self.input_size = 224
- self.input_mean = [104, 117, 128]
- self.input_std = [1]
-
- if self.modality == 'Flow':
- self.input_mean = [128]
- elif self.modality == 'RGBDiff':
- self.input_mean = self.input_mean * (1 + self.new_length)
-
- elif 'ECOLite' in base_model:
- import arch
- self.base_model = getattr(arch, base_model)(batch_size=self.batch_size, data_length=self.data_length,
- num_frames=self.num_frames, pretrain=self.pretrain,
- num_classes=self.num_class)
- self.base_model.last_layer_name = 'fc_final'
- self.input_size = 224
- self.input_mean = [104, 117, 128]
- self.input_std = [1]
-
- if self.modality == 'Flow':
- self.input_mean = [128]
- elif self.modality == 'RGBDiff':
- self.input_mean = self.input_mean * (1 + self.new_length)
-
- elif 'inception' in base_model:
- import arch
- self.base_model = getattr(arch, base_model)()
- self.base_model.last_layer_name = 'classif'
- self.input_size = 299
- self.input_mean = [0.5]
- self.input_std = [0.5]
- else:
- raise ValueError('Unknown base model: {}'.format(base_model))
-
- def _init_weights(self):
- if self.checkpoint_path.startswith('./'):
- self.checkpoint_path = '{0}/{1}'.format(os.path.dirname(os.path.realpath(__file__)),
- self.checkpoint_path.replace('./', ''))
- if not self.checkpoint_path or not os.path.isfile(self.checkpoint_path):
- print(("=> no checkpoint found at '{}'".format(self.checkpoint_path)))
- return
-
- print(("=> loading checkpoint '{}'".format(self.checkpoint_path)))
- checkpoint = torch.load(self.checkpoint_path)
- model_dict = self.state_dict()
- new_state_dict = {k[7:]: v for k, v in checkpoint['state_dict'].items() if k[7:] in model_dict}
- un_init_dict_keys = [k for k in model_dict.keys() if k not in new_state_dict]
- print("un_init_dict_keys: ", un_init_dict_keys)
- for k in un_init_dict_keys:
- new_state_dict[k] = torch.DoubleTensor(model_dict[k].size()).zero_()
- self.load_state_dict(new_state_dict)
-
- if self.modality == 'PA':
- for name, param in self.PA.named_parameters():
- param.requires_grad = False
- for name, param in self.base_model.named_parameters():
- param.requires_grad = False
- for name, param in self.VIP.named_parameters():
- param.requires_grad = False
-
- def train(self, mode=True):
- """
- Override the default train() to freeze the BN parameters
- :return:
- """
- super(CongestionLevel, self).train(mode)
- count = 0
- if self._enable_pbn:
- print("Freezing BatchNorm2D except the first one.")
- for m in self.base_model.modules():
- if isinstance(m, nn.BatchNorm2d):
- count += 1
- if count >= (2 if self._enable_pbn else 1):
- m.eval()
- # shutdown update in frozen mode
- m.weight.requires_grad = False
- m.bias.requires_grad = False
-
- def partialBN(self, enable):
- self._enable_pbn = enable
-
- def get_optim_policies(self):
- first_conv_weight = []
- first_conv_bias = []
- normal_weight = []
- normal_bias = []
- bn = []
-
- conv_cnt = 0
- bn_cnt = 0
- for m in self.modules():
- if isinstance(m, torch.nn.Conv2d) or isinstance(m, torch.nn.Conv1d) or isinstance(m, torch.nn.Conv3d):
- ps = list(m.parameters())
- conv_cnt += 1
- if conv_cnt == 1:
- first_conv_weight.append(ps[0])
- if len(ps) == 2:
- first_conv_bias.append(ps[1])
- else:
- normal_weight.append(ps[0])
- if len(ps) == 2:
- normal_bias.append(ps[1])
- elif isinstance(m, torch.nn.Linear):
- ps = list(m.parameters())
- normal_weight.append(ps[0])
- if len(ps) == 2:
- normal_bias.append(ps[1])
-
- elif isinstance(m, torch.nn.BatchNorm1d):
- bn.extend(list(m.parameters()))
- elif isinstance(m, torch.nn.BatchNorm2d) or isinstance(m, torch.nn.BatchNorm3d):
- bn_cnt += 1
- # later BN's are frozen
- if not self._enable_pbn or bn_cnt == 1:
- bn.extend(list(m.parameters()))
- elif len(m._modules) == 0:
- if len(list(m.parameters())) > 0:
- raise ValueError("New atomic module type: {}. Need to give it a learning policy".format(type(m)))
-
- return [
- {'params': first_conv_weight, 'lr_mult': 5 if self.modality == 'PA' else 1, 'decay_mult': 1,
- 'name': "first_conv_weight"},
- {'params': first_conv_bias, 'lr_mult': 10 if self.modality == 'PA' else 2, 'decay_mult': 0,
- 'name': "first_conv_bias"},
- {'params': normal_weight, 'lr_mult': 1, 'decay_mult': 1,
- 'name': "normal_weight"},
- {'params': normal_bias, 'lr_mult': 2, 'decay_mult': 0,
- 'name': "normal_bias"},
- {'params': bn, 'lr_mult': 1, 'decay_mult': 0,
- 'name': "BN scale/shift"},
- ]
-
- def _get_diff(self, input, keep_rgb=False):
- input_c = 3 if self.modality in ["RGB", "PA", "RGBDiff"] else 2
- input_view = input.view((-1, self.num_segments, self.new_length + 1, input_c,) + input.size()[2:])
- if keep_rgb:
- new_data = input_view.clone()
- else:
- new_data = input_view[:, :, 1:, :, :, :].clone()
-
- for x in reversed(list(range(1, self.new_length + 1))):
- if keep_rgb:
- new_data[:, :, x, :, :, :] = input_view[:, :, x, :, :, :] - input_view[:, :, x - 1, :, :, :]
- else:
- new_data[:, :, x - 1, :, :, :] = input_view[:, :, x, :, :, :] - input_view[:, :, x - 1, :, :, :]
-
- return new_data
-
- def _construct_flow_model(self, base_model):
- # modify the convolution layers
- # Torch models are usually defined in a hierarchical way.
- # nn.modules.children() return all sub modules in a DFS manner
- modules = list(self.base_model.modules())
- first_conv_idx = list(filter(lambda x: isinstance(modules[x], nn.Conv2d), list(range(len(modules)))))[0]
- conv_layer = modules[first_conv_idx]
- container = modules[first_conv_idx - 1]
-
- # modify parameters, assume the first blob contains the convolution kernels
- params = [x.clone() for x in conv_layer.parameters()]
- kernel_size = params[0].size()
- new_kernel_size = kernel_size[:1] + (2 * self.new_length,) + kernel_size[2:]
- new_kernels = params[0].data.mean(dim=1, keepdim=True).expand(new_kernel_size).contiguous()
-
- new_conv = nn.Conv2d(2 * self.new_length, conv_layer.out_channels,
- conv_layer.kernel_size, conv_layer.stride, conv_layer.padding,
- bias=True if len(params) == 2 else False)
- new_conv.weight.data = new_kernels
- if len(params) == 2:
- new_conv.bias.data = params[1].data # add bias if neccessary
- layer_name = list(container.state_dict().keys())[0][:-7] # remove .weight suffix to get the layer name
-
- # replace the first convlution layer
- setattr(container, layer_name, new_conv)
- return base_model
-
- def _construct_diff_model(self, base_model, keep_rgb=False):
- # modify the convolution layers
- # Torch models are usually defined in a hierarchical way.
- # nn.modules.children() return all sub modules in a DFS manner
- modules = list(self.base_model.modules())
- first_conv_idx = list(filter(lambda x: isinstance(modules[x], nn.Conv2d), list(range(len(modules)))))[0]
- conv_layer = modules[first_conv_idx]
- container = modules[first_conv_idx - 1]
-
- # modify parameters, assume the first blob contains the convolution kernels
- params = [x.clone() for x in conv_layer.parameters()]
- kernel_size = params[0].size()
- if not keep_rgb:
- new_kernel_size = kernel_size[:1] + (3 * self.new_length,) + kernel_size[2:]
- new_kernels = params[0].data.mean(dim=1, keepdim=True).expand(new_kernel_size).contiguous()
- else:
- new_kernel_size = kernel_size[:1] + (3 * self.new_length,) + kernel_size[2:]
- new_kernels = torch.cat(
- (params[0].data, params[0].data.mean(dim=1, keepdim=True).expand(new_kernel_size).contiguous()),
- 1)
- new_kernel_size = kernel_size[:1] + (3 + 3 * self.new_length,) + kernel_size[2:]
-
- new_conv = nn.Conv2d(new_kernel_size[1], conv_layer.out_channels,
- conv_layer.kernel_size, conv_layer.stride, conv_layer.padding,
- bias=True if len(params) == 2 else False)
- new_conv.weight.data = new_kernels
- if len(params) == 2:
- new_conv.bias.data = params[1].data # add bias if neccessary
- layer_name = list(container.state_dict().keys())[0][:-7] # remove .weight suffix to get the layer name
-
- # replace the first convolution layer
- setattr(container, layer_name, new_conv)
- return base_model
-
- @property
- def crop_size(self):
- return self.input_size
-
- @property
- def scale_size(self):
- return self.input_size * 256 // 224
-
- def get_augmentation(self):
- if self.modality == 'RGB' or self.modality == 'PA':
- return torchvision.transforms.Compose([GroupMultiScaleCrop(self.input_size, [1, .875, .75, .66]),
- GroupRandomHorizontalFlip(is_flow=False)])
- elif self.modality == 'Flow':
- return torchvision.transforms.Compose([GroupMultiScaleCrop(self.input_size, [1, .875, .75]),
- GroupRandomHorizontalFlip(is_flow=True)])
- elif self.modality == 'RGBDiff':
- return torchvision.transforms.Compose([GroupMultiScaleCrop(self.input_size, [1, .875, .75]),
- GroupRandomHorizontalFlip(is_flow=False)])
|