|
- # 实现 splitted 训练 并输出结果
-
- # 固定 seed 值(只能保证误差较小)===================== 不使用固定seed可以大大加快训练速度,但最终结果会有3%左右的波动
- # model.fit() 中的 shuffle=False,worker=1
-
- # SEED = 1 # 固定随机值
- import os
- # os.environ['PYTHONHASHSEED'] = str(SEED) # 固定随机值
-
- import random
- # random.seed(SEED) # 固定随机值
-
- import tensorflow
- # tensorflow.random.set_seed(SEED) # 固定随机值
-
- import numpy as np
- # np.random.seed(SEED) # 固定随机值
- # os.environ['TF_DETERMINISTIC_OPS'] = '1' # 固定随机值(需要安装 tensorflow-determinism 才有效)
- #===================================
-
- import keras
- import time
- import h5py
- import yaml
- import pickle
- import pandas as pd
-
- os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
- os.environ["CUDA_VISIBLE_DEVICES"] = "0"
-
- from keras import backend as K
- import threading
-
- # 用于 inference
- import pandas as pd
- from sklearn.metrics import log_loss
- from sklearn.metrics import confusion_matrix
-
- from tensorflow.keras.layers import Dense, Dropout, multiply, Reshape, Conv1D
- import math
-
- from tensorflow.keras.optimizers import SGD
- from tensorflow.keras.layers import Conv2D, BatchNormalization, Activation, GlobalAveragePooling2D
- from tensorflow.keras.layers import AveragePooling2D, Input, concatenate, Lambda
- from tensorflow.keras.regularizers import l2
- from tensorflow.keras.models import Model
-
- print("tensorflow version = ",tensorflow.__version__)
-
- #=========================================================================================================#
- data_path = '/dataset'
- train_csv = data_path + '/train_all.csv'
- val_csv = data_path + '/fold1_evaluate.csv'
-
- # logmel 特征位置
- feat_path = '/dataset/features'
-
- # h5 文件保存地址
- savedir = '/model/Chen'
- if not os.path.exists(savedir):
- os.makedirs(savedir)
-
- # 读取保存的最佳模型 h5 文件
- model_path = savedir + "/bestModel.h5"
-
- sr = 44100 # 48KHz
- num_audio_channels = 1 # 单声道
- duration = 10 # 采样频率
-
- #log-mel spectrogram parameters
- num_freq_bin = 128 # 128个梅尔滤波器个数,会形成128维频域特征
- num_fft = 2048 # 短时傅里叶变化点、窗口大小
- hop_length = int(num_fft/2) # 窗口滑动长度为1024
- num_time_bin = 423 # 时域样本: int(np.ceil(duration * sr / hop_length))
-
- #training parameters
- num_classes = 10
- max_lr = 0.1
- batch_size = 32
- num_epochs = 64 #TODO
- mixup_alpha = 0.4
- sample_num = len(open(train_csv, 'r').readlines()) - 1
-
- crop_length = 0 # 随机裁剪后的长度
-
- num_filters = 32
- wd = 1e-3
- output_num_filters_factor = 1
- num_stacks = 4 # number of residual stacks
-
- delta = True # 使用该方法需内存 >16G
- focal_loss = False # 是否使用 focal_loss
- gamma=1.0
- alpha=0.3
- use_split = False # 是否使用双通道
- model_selection = 0
- # '''
- # 0) R-GhostNet
- # 1) TODO
- # '''
-
- #=========================================================================================================#
- # 加载 logmel 文件,获取特征和标签
- def load_data_2020(feat_path, csv_path):
- with open(csv_path, 'r') as text_file:
- lines = text_file.read().split('\n')
- for idx, elem in enumerate(lines):
- lines[idx] = lines[idx].split('\t')
- lines[idx][0] = lines[idx][0].split('/')[-1].split('.')[0]
-
- lines = lines[1:]
- lines = [elem for elem in lines if elem != ['']]
- for idx, elem in enumerate(lines):
- lines[idx][-1] = lines[idx][-1].split('\r')[0]
- label_info = np.array(lines)
-
- data_df = pd.read_csv(csv_path, sep='\t', encoding='ASCII')
- labels = data_df['scene_label'].astype('category').cat.codes.values
-
- feat_mtx = []
- for [filename, labnel] in label_info:
- filepath = feat_path + '/' + filename + '.logmel'
- with open(filepath,'rb') as f:
- temp=pickle.load(f, encoding='latin1')
- feat_mtx.append(temp['feat_data'])
-
- feat_mtx = np.array(feat_mtx)
- return feat_mtx, labels
-
- def load_data_2020_splitted(feat_path, csv_path, idxlines):
- with open(csv_path, 'r') as text_file:
- lines = text_file.read().split('\n')
- for idx, elem in enumerate(lines):
- lines[idx] = lines[idx].split('\t')
- lines[idx][0] = lines[idx][0].split('/')[-1].split('.')[0]
-
- lines = lines[1:]
- lines = [lines[i] for i in idxlines]
- lines = [elem for elem in lines if elem != ['']]
- for idx, elem in enumerate(lines):
- lines[idx][-1] = lines[idx][-1].split('\r')[0]
- label_info = np.array(lines)
-
- data_df = pd.read_csv(csv_path, sep='\t', encoding='ASCII')
- ClassNames = np.unique(data_df['scene_label'])
- labels = data_df['scene_label'].astype('category').cat.codes.values
- labels = [labels[i] for i in idxlines]
-
- feat_mtx = []
- for [filename, label] in label_info:
- filepath = feat_path + '/' + filename + '.' + 'logmel'
- with open(filepath,'rb') as f:
- temp=pickle.load(f, encoding='latin1')
- feat_mtx.append(temp['feat_data'])
-
- feat_mtx = np.array(feat_mtx)
- return feat_mtx, labels
-
- # calculate_deltas
- def deltas(X_in):
- X_out = (X_in[:,:,2:,:]-X_in[:,:,:-2,:])/10.0
- X_out = X_out[:,:,1:-1,:]+(X_in[:,:,4:,:]-X_in[:,:,:-4,:])/5.0
- return X_out
-
-
- # calculate_deltas
- def calculate_deltas(LM_feature):
- LM_deltas_data = deltas(LM_feature)
- LM_deltas_deltas_data = deltas(LM_deltas_data)
- LM_feature = np.concatenate((LM_feature[:,:,4:-4,:],LM_deltas_data[:,:,2:-2,:],LM_deltas_deltas_data),axis=-1)
-
- return LM_feature
-
- #=========================================================================================================#
- extract_time = time.time()
- data_val, y_val = load_data_2020(feat_path, val_csv)
-
- # 用于 inference
- y_val_infer = y_val
-
- if delta:
- data_deltas_val = deltas(data_val)
- data_deltas_deltas_val = deltas(data_deltas_val)
- data_val = np.concatenate((data_val[:,:,4:-4,:], data_deltas_val[:,:,2:-2,:], data_deltas_deltas_val), axis=-1)
- y_val = tensorflow.keras.utils.to_categorical(y_val, num_classes)
-
- # 用于 inference
- y_val_onehot = y_val
-
- print(time.time() - extract_time)
-
- if delta:
- num_audio_channels *= 3
-
- #=========================================================================================================#
- class LR_WarmRestart(tensorflow.keras.callbacks.Callback):
-
- def __init__(self,nbatch,initial_lr,min_lr,epochs_restart,Tmult):
- self.initial_lr = initial_lr
- self.min_lr = min_lr
- self.epochs_restart = epochs_restart
- self.nbatch = nbatch
- self.currentEP=0
- self.startEP=0
- self.Tmult=Tmult
-
- def on_epoch_begin(self, epoch, logs={}):
- if epoch+1<self.epochs_restart[0]:
- self.currentEP = epoch
- else:
- self.currentEP = epoch+1
-
- if np.isin(self.currentEP,self.epochs_restart):
- self.startEP=self.currentEP
- self.Tmult=2*self.Tmult
-
- def on_epoch_end(self, epochs, logs={}):
- lr = K.get_value(self.model.optimizer.lr)
- print ('\nLearningRate:{:.6f}'.format(lr))
-
- def on_batch_begin(self, batch, logs={}):
- pts = self.currentEP + batch/self.nbatch - self.startEP
- decay = 1+np.cos(pts/self.Tmult*np.pi)
- lr = self.min_lr+0.5*(self.initial_lr-self.min_lr)*decay
- K.set_value(self.model.optimizer.lr,lr)
-
-
- class threadsafe_iter:
- """Takes an iterator/generator and makes it thread-safe by
- serializing call to the `next` method of given iterator/generator.
- """
- def __init__(self, it):
- self.it = it
- self.lock = threading.Lock()
-
- def __iter__(self):
- return self
-
- # python 3
- def __next__(self):
- with self.lock:
- return self.it.__next__()
-
- # python 2
- # def next(self):
- # with self.lock:
- # return self.it.next()
-
- def threadsafe_generator(f):
- """A decorator that takes a generator function and makes it thread-safe.
- """
- def g(*a, **kw):
- return threadsafe_iter(f(*a, **kw))
- return g
-
- # 时频域掩膜/遮蔽
- def frequency_masking(mel_spectrogram, frequency_masking_para=13, frequency_mask_num=1):
- fbank_size = mel_spectrogram.shape
-
- for i in range(frequency_mask_num):
- f = random.randrange(0, frequency_masking_para)
- f0 = random.randrange(0, fbank_size[0] - f)
-
- if (f0 == f0 + f):
- continue
-
- mel_spectrogram[f0:(f0+f),:] = 0
- return mel_spectrogram
-
- def time_masking(mel_spectrogram, time_masking_para=40, time_mask_num=1):
- fbank_size = mel_spectrogram.shape
-
- for i in range(time_mask_num):
- t = random.randrange(0, time_masking_para)
- t0 = random.randrange(0, fbank_size[1] - t)
-
- if (t0 == t0 + t):
- continue
-
- mel_spectrogram[:, t0:(t0+t)] = 0
- return mel_spectrogram
-
- class MixupGenerator_splitted():
- '''
- Reference: https://github.com/yu4u/mixup-generator
- '''
-
- def __init__(self, feat_path, train_csv, batch_size=32, alpha=0.2, shuffle=True, splitted_num=4, crop_length=400):
- self.feat_path = feat_path
- self.train_csv = train_csv
- self.batch_size = batch_size
- self.alpha = alpha
- self.shuffle = shuffle
- self.sample_num = len(open(train_csv, 'r').readlines())-1
- self.lock = threading.Lock()
- self.NewLength = crop_length
- self.splitted_num = splitted_num
-
- def __iter__(self):
- return self
-
- @threadsafe_generator
- def __call__(self):
- with self.lock:
- while True:
- indexes = self.__get_exploration_order()
-
- item_num = self.sample_num // self.splitted_num - (self.sample_num // self.splitted_num) % self.batch_size
-
- for k in range(self.splitted_num):
- cur_item_num = item_num
- s = k * item_num
- e = (k+1) * item_num
- if k == self.splitted_num - 1:
- cur_item_num = self.sample_num - (self.splitted_num - 1) * item_num
- e = self.sample_num
-
- lines = indexes[s:e]
- X_train, y_train = load_data_2020_splitted(self.feat_path, self.train_csv, lines)
- y_train = tensorflow.keras.utils.to_categorical(y_train, 10)
- X_deltas_train = deltas(X_train)
- X_deltas_deltas_train = deltas(X_deltas_train)
- X_train = np.concatenate((X_train[:,:,4:-4,:], X_deltas_train[:,:,2:-2,:], X_deltas_deltas_train), axis=-1)
-
- itr_num = int(cur_item_num // (self.batch_size * 2))
-
- for i in range(itr_num):
- batch_ids = np.arange(cur_item_num)[i*self.batch_size * 2:(i + 1) * self.batch_size * 2]
- X, y = self.__data_generation(batch_ids, X_train, y_train)
- yield X, y
-
- def __get_exploration_order(self):
- indexes = np.arange(self.sample_num)
-
- if self.shuffle:
- np.random.shuffle(indexes)
-
- return indexes
-
- def __data_generation(self, batch_ids, X_train, y_train):
- _, h, w, c = X_train.shape
- l = np.random.beta(self.alpha, self.alpha, self.batch_size)
- X_l = l.reshape(self.batch_size, 1, 1, 1)
- y_l = l.reshape(self.batch_size, 1)
-
- X1 = X_train[batch_ids[:self.batch_size]]
- X2 = X_train[batch_ids[self.batch_size:]]
-
- for j in range(X1.shape[0]):
-
- # time & frequency masking
- for c in range(X1.shape[3]):
- X1[j, :, :, c] = frequency_masking(X1[j, :, :, c])
- X1[j, :, :, c] = time_masking(X1[j, :, :, c])
- X2[j, :, :, c] = frequency_masking(X2[j, :, :, c])
- X2[j, :, :, c] = time_masking(X2[j, :, :, c])
-
- # random cropping
- if self.NewLength > 0:
- StartLoc1 = np.random.randint(0,X1.shape[2]-self.NewLength)
- StartLoc2 = np.random.randint(0,X2.shape[2]-self.NewLength)
-
- X1[j,:,0:self.NewLength,:] = X1[j,:,StartLoc1:StartLoc1+self.NewLength,:]
- X2[j,:,0:self.NewLength,:] = X2[j,:,StartLoc2:StartLoc2+self.NewLength,:]
-
- # random cropping
- if self.NewLength > 0:
- X1 = X1[:,:,0:self.NewLength,:]
- X2 = X2[:,:,0:self.NewLength,:]
-
- # mixup
- X = X1 * X_l + X2 * (1.0 - X_l)
-
- if isinstance(y_train, list):
- y = []
-
- for y_train_ in y_train:
- y1 = y_train_[batch_ids[:self.batch_size]]
- y2 = y_train_[batch_ids[self.batch_size:]]
- y.append(y1 * y_l + y2 * (1.0 - y_l))
- else:
- y1 = y_train[batch_ids[:self.batch_size]]
- y2 = y_train[batch_ids[self.batch_size:]]
- y = y1 * y_l + y2 * (1.0 - y_l)
-
-
- return X, y
-
-
- def categorical_focal_loss(gamma=2., alpha=.25):
- """
- Reference: https://github.com/umbertogriffo/focal-loss-keras
-
- Softmax version of focal loss.
- m
- FL = SUM -alpha * (1 - p_o,c)^gamma * y_o,c * log(p_o,c)
- c=1
- where m = number of classes, c = class and o = observation
- Parameters:
- alpha -- the same as weighing factor in balanced cross entropy
- gamma -- focusing parameter for modulating factor (1-p)
- Default value:
- gamma -- 2.0 as mentioned in the paper
- alpha -- 0.25 as mentioned in the paper
- References:
- Official paper: https://arxiv.org/pdf/1708.02002.pdf
- https://www.tensorflow.org/api_docs/python/tf/keras/backend/categorical_crossentropy
- Usage:
- model.compile(loss=[categorical_focal_loss(alpha=.25, gamma=2)], metrics=["accuracy"], optimizer=adam)
- """
- def categorical_focal_loss_fixed(y_true, y_pred):
- """
- :param y_true: A tensor of the same shape as `y_pred`
- :param y_pred: A tensor resulting from a softmax
- :return: Output tensor.
- """
-
- # Scale predictions so that the class probas of each sample sum to 1
- y_pred /= K.sum(y_pred, axis=-1, keepdims=True)
-
- # Clip the prediction value to prevent NaN's and Inf's
- epsilon = K.epsilon()
- y_pred = K.clip(y_pred, epsilon, 1. - epsilon)
-
- # Calculate Cross Entropy
- cross_entropy = -y_true * K.log(y_pred)
-
- # Calculate Focal Loss
- loss = alpha * K.pow(1 - y_pred, gamma) * cross_entropy
-
- # Compute mean loss in mini_batch
- return K.mean(loss, axis=1)
-
- return categorical_focal_loss_fixed
-
- # 自定义 checkpoint
- class MetaCheckpoint(tensorflow.keras.callbacks.ModelCheckpoint):
- def __init__(self, filepath, monitor='val_loss', verbose=0,
- save_best_only=False, save_weights_only=False,
- mode='auto', save_freq='epoch', training_args=None, meta=None):
-
- super(MetaCheckpoint, self).__init__(filepath,
- monitor=monitor,
- verbose=verbose,
- save_best_only=save_best_only,
- save_weights_only=save_weights_only,
- mode=mode,
- save_freq=save_freq)
-
- self.filepath = filepath
- self.new_file_override = True
- self.meta = meta or {'epochs': [], self.monitor: []}
-
- if training_args:
- self.meta['training_args'] = training_args
-
- def on_train_begin(self, logs={}):
- if self.save_best_only:
- if 'accuracy' in self.monitor or self.monitor.startswith('fmeasure'):
- self.best = max(self.meta[self.monitor], default=-np.Inf)
- else:
- self.best = min(self.meta[self.monitor], default=np.Inf)
-
- super(MetaCheckpoint, self).on_train_begin(logs)
-
- def on_epoch_end(self, epoch, logs={}):
- # 只有在‘只保存’最优版本且生成新的.h5文件的情况下
- if self.save_best_only:
- current = logs.get(self.monitor)
- if self.monitor_op(current, self.best):
- self.new_file_override = True
- else:
- self.new_file_override = False
-
- super(MetaCheckpoint, self).on_epoch_end(epoch, logs)
-
- # Get statistics
- self.meta['epochs'].append(epoch)
- for k, v in logs.items():
- # Get default gets the value or sets (and gets) the default value
- self.meta.setdefault(k, []).append(v)
-
- # Save to file
- filepath = self.filepath.format(epoch=epoch, **logs)
-
- if self.new_file_override and self.epochs_since_last_save == 0:
- # 只有在‘只保存’最优版本且生成新的.h5文件的情况下 才会继续添加meta
- with h5py.File(filepath, 'r+') as f:
- meta_group = f.create_group('meta')
- meta_group.attrs['training_args'] = yaml.dump(
- self.meta.get('training_args', '{}'))
- meta_group.create_dataset('epochs', data=np.array(self.meta['epochs']))
- for k in logs:
- meta_group.create_dataset(k, data=np.array(self.meta[k]))
-
- # 实现断点续传功能
- def load_meta(model_fname):
- """
- Load meta configuration
- :param model_fname: model file name
- :return: meta info
- """
- meta = {}
-
- with h5py.File(model_fname, 'r') as f:
- meta_group = f['meta']
-
- meta['training_args'] = yaml.load(
- meta_group.attrs['training_args'])
- for k in meta_group.keys():
- meta[k] = list(meta_group[k])
-
- return meta
-
- def get_last_status(model, savedir):
- last_epoch = -1
- last_meta = {}
- if os.path.exists(savedir + "/bestModel.h5"):
- model.load_weights(savedir + "/bestModel.h5")
- last_meta = load_meta(savedir + "/bestModel.h5")
- last_epoch = last_meta.get('epochs')[-1]
- return last_epoch, last_meta
-
- #=========================================================================================================#
- #network definition
- def gca_block(x, b=1, gama=2):
-
- # 根据公式计算自适应卷积核大小
- kernel_size = int(abs((math.log(x.shape[-1], 2) + b) / gama))
- if kernel_size % 2:
- kernel_size = kernel_size
- else:
- kernel_size = kernel_size + 1
-
- x_shape = K.int_shape(x)
- [b, h, w, c] = x_shape
-
- x_h = AveragePooling2D(pool_size=(1, w), strides=1, padding='valid')(x)
- x_w = AveragePooling2D(pool_size=(h, 1), strides=1, padding='valid')(x)
- x_w = tensorflow.transpose(x_w, [0, 2, 1, 3])
-
- y = tensorflow.concat([x_h, x_w], axis=1)
-
- y = Conv1D(filters=1, kernel_size=kernel_size, padding='same', use_bias=False)(y)
- y = Activation('gelu')(y)
-
- x_h, x_w = tensorflow.split(y, num_or_size_splits=[h, w], axis=1)
- x_w = tensorflow.transpose(x_w, [0, 2, 1, 3])
-
- a_h = Conv1D(filters=1, kernel_size=kernel_size, padding='same', use_bias=False)(x_h)
- a_h = Activation('sigmoid')(a_h)
- a_w = Conv1D(filters=1, kernel_size=kernel_size, padding='same', use_bias=False)(x_w)
- a_w = Activation('sigmoid')(a_w)
-
- out = x * a_h * a_w
- return out
-
- # def eca_block(inputs, b=1, gama=2):
-
- # in_channel = inputs.shape[-1]
- # kernel_size = int(abs((math.log(in_channel, 2) + b) / gama)) # 根据公式计算自适应卷积核大小
- # if kernel_size % 2:
- # kernel_size = kernel_size
- # else:
- # kernel_size = kernel_size + 1
-
- # # [h,w,c]==>[None,c] 全局平均池化
- # x = GlobalAveragePooling2D()(inputs)
- # # [None,c]==>[c,1]
- # x = Reshape(target_shape=(in_channel, 1))(x)
- # # [c,1]==>[c,1]
- # x = Conv1D(filters=1, kernel_size=kernel_size, padding='same', use_bias=False)(x)
- # # sigmoid激活
- # x = Activation('sigmoid')(x)
- # # [c,1]==>[1,1,c]
- # x = Reshape((1,1,in_channel))(x)
-
- # outputs = multiply([inputs, x])
- # return outputs
-
- # def coordinate_attention(x, reduction_ratio=8):
-
- # def h_swish(x):
- # tmpx = tensorflow.nn.relu6(x+3) / 6
- # x = x * tmpx
- # return x
-
- # x_shape = K.int_shape(x)
- # [b, h, w, c] = x_shape
-
- # x_h = AveragePooling2D(pool_size=(1, w), strides=1, padding='valid')(x)
- # x_w = AveragePooling2D(pool_size=(h, 1), strides=1, padding='valid')(x)
- # x_w = tensorflow.transpose(x_w, [0, 2, 1, 3])
-
- # y = tensorflow.concat([x_h, x_w], axis=1)
-
- # mip = max(8, c // reduction_ratio)
- # y = Conv2D(mip, (1, 1), strides=1, padding='valid')(y)
-
- # y = BatchNormalization(axis=-1)(y)
- # y = h_swish(y)
-
- # x_h, x_w = tensorflow.split(y, num_or_size_splits=[h, w], axis=1)
- # x_w = tensorflow.transpose(x_w, [0, 2, 1, 3])
-
- # a_h = Conv2D(c, (1, 1), strides=1, padding='same')(x_h)
- # a_h = Activation('sigmoid')(a_h)
- # a_w = Conv2D(c, (1, 1), strides=1, padding='same')(x_w)
- # a_w = Activation('sigmoid')(a_w)
-
- # out = x * a_h * a_w
- # return out
-
- # def squeeze_and_excite(inputs, ratio=4):
- # squeeze = inputs.shape[-1] // ratio # 第一个FC降低通道数个数
- # excitation = inputs.shape[-1] # 第二个FC上升通道数个数
-
- # x = GlobalAveragePooling2D()(inputs)
- # x = Dense(squeeze)(x)
- # x = Activation('relu')(x)
- # x = Dense(excitation)(x)
- # x = Activation('sigmoid')(x)
-
- # x = multiply([inputs, x])
- # return x
-
- def ghost_conv(inputs, out_channels, use_relu=True):
-
- x1 = Conv2D(out_channels // 2, kernel_size=(1,1), strides=1,
- padding="same", use_bias=False)(inputs)
- x1 = BatchNormalization()(x1)
- if use_relu:
- x1 = Activation('gelu')(x1)
- x2 = Conv2D(out_channels // 2, kernel_size=(3, 3), strides=1,
- padding="same", use_bias=False, groups=out_channels // 2)(x1)
- x2 = BatchNormalization()(x2)
- if use_relu:
- x2 = Activation('relu')(x2)
- return concatenate([x1, x2], axis=-1)
-
- def resnet_layer(inputs,num_filters=16,kernel_size=3,strides=1,learn_bn = True,wd=1e-4, use_relu=True):
-
- x = inputs
- x = Conv2D(num_filters,kernel_size=kernel_size,strides=strides,padding='same',kernel_initializer='he_normal',
- kernel_regularizer=l2(wd),use_bias=False)(x)
- x = BatchNormalization(center=learn_bn, scale=learn_bn)(x)
- if use_relu:
- x = Activation('relu')(x)
-
- return x
-
- def pad_depth(inputs, desired_channels):
- from keras import backend as K
- y = K.zeros_like(inputs, name='pad_depth1')
- return y
-
- def My_freq_split1(x):
- from keras import backend as K
- return x[:,0:64,:,:]
-
- def My_freq_split2(x):
- from keras import backend as K
- return x[:,64:128,:,:]
-
- def RG_bneck(inputs, num_filters, My_wd, num_stacks):
-
- num_res_blocks=2
-
- #===== ealry fusion =====
- ResidualPath1 = resnet_layer(inputs=inputs,
- num_filters=num_filters,
- strides=2,
- learn_bn = True,
- wd=My_wd,
- use_relu = False)
- ResidualPath1 = gca_block(ResidualPath1)
-
- ResidualPath2 = resnet_layer(inputs=inputs,
- num_filters=num_filters,
- strides=[2,1],
- learn_bn = True,
- wd=My_wd,
- use_relu = False)
- ResidualPath2 = gca_block(ResidualPath2)
- ResidualPath = tensorflow.concat([ResidualPath1, ResidualPath2], axis=2)
- #========================
-
- # ResidualPath = resnet_layer(inputs=inputs,
- # num_filters=num_filters,
- # strides=2,
- # learn_bn = True,
- # wd=My_wd,
- # use_relu = False)
-
- # Instantiate the stack of residual units
- for stack in range(num_stacks):
- for res_block in range(num_res_blocks):
- strides = 1
- if stack > 0 and res_block == 0: # first layer but not first stack
- strides = 2 # downsample
-
- if strides != 1:
- # 升维
- ConvPath = ghost_conv(inputs=ResidualPath, out_channels=ResidualPath.shape[-1]*4)
- ConvPath = Conv2D(ConvPath.shape[-1], kernel_size=(3, 3), strides=strides,
- padding="same", use_bias=False, groups=ConvPath.shape[-1])(ConvPath)
- ConvPath = BatchNormalization()(ConvPath)
- # 降维时不需要做非线性激活会破坏特征
- ConvPath = ghost_conv(inputs=ConvPath, out_channels=num_filters, use_relu=False)
-
- else:
- ConvPath = ghost_conv(inputs=ResidualPath, out_channels=ResidualPath.shape[-1]*4)
- ConvPath = ghost_conv(inputs=ConvPath, out_channels=num_filters, use_relu=False)
- ConvPath = tensorflow.keras.layers.add([ConvPath,ResidualPath])
-
- if stack > 0 and res_block == 0:
- ResidualPath = Conv2D(ResidualPath.shape[-1], kernel_size=(3, 3), strides=strides,
- padding="same", use_bias=False, groups=ResidualPath.shape[-1])(ResidualPath)
- ResidualPath = BatchNormalization()(ResidualPath)
- ResidualPath = Conv2D(num_filters, kernel_size=(1,1), strides=1,
- padding="same", use_bias=False)(ResidualPath)
- ResidualPath = BatchNormalization()(ResidualPath)
-
- ResidualPath = tensorflow.keras.layers.add([ConvPath,ResidualPath])
-
- #when we are here, we double the number of filters
- num_filters *= 2
-
- return ResidualPath
-
-
- def RGhostnet(num_classes,input_shape =[128,None,6], num_filters =24,
- wd=1e-3, num_stacks=4, output_num_filters_factor=1, use_split=False):
-
- My_wd = wd #this is 5e-3 in matlab, so quite large
-
- inputs = Input(shape=input_shape)
- x = inputs
-
- if use_split:
- #split up frequency into two branches
- Split1= Lambda(My_freq_split1)(x)
- Split2= Lambda(My_freq_split2)(x)
-
- ResidualPath1 = RG_bneck(Split1, num_filters, My_wd, num_stacks)
- ResidualPath2 = RG_bneck(Split2, num_filters, My_wd, num_stacks)
- ResidualPath = concatenate([ResidualPath1,ResidualPath2],axis=1)
- else:
- ResidualPath = RG_bneck(x, num_filters, My_wd, num_stacks)
-
- # 保证 num_filters 相同
- for _ in range(num_stacks):
- num_filters *= 2
-
- OutputPath = resnet_layer(inputs=ResidualPath,
- num_filters=num_filters*output_num_filters_factor,
- kernel_size=1,
- strides=1,
- learn_bn = False,
- wd=My_wd,
- use_relu = True)
-
- OutputPath = Dropout(0.3)(OutputPath)
- # OutputPath = gca_block(OutputPath)
-
- #output layers after last sum
- OutputPath = resnet_layer(inputs=OutputPath,
- num_filters=num_classes,
- strides = 1,
- kernel_size=1,
- learn_bn = False,
- wd=My_wd,
- use_relu=False)
- OutputPath = BatchNormalization(center=False, scale=False)(OutputPath)
- # OutputPath = GlobalAveragePooling2D()(OutputPath)
- # OutputPath = Activation('softmax')(OutputPath)
-
- #===== late fusion =====
- OutputPathShape = K.int_shape(OutputPath)
- if OutputPathShape[2]%2 == 1:
- tmp = int(OutputPathShape[2]/2)
- OutputPath1, OutputPath2 = tensorflow.split(OutputPath, num_or_size_splits=[tmp+1, tmp], axis=2)
- else:
- OutputPath1, OutputPath2 = tensorflow.split(OutputPath, num_or_size_splits=2, axis=2)
-
- # OutputPath1 = gca_block(OutputPath1)
- OutputPath1 = GlobalAveragePooling2D()(OutputPath1)
- OutputPath1 = Activation('softmax')(OutputPath1)
-
- OutputPath2 = GlobalAveragePooling2D()(OutputPath2)
- OutputPath2 = Activation('softmax')(OutputPath2)
-
- OutputPath = 0.5*OutputPath1 + 0.5*OutputPath2
- #========================
-
- # Instantiate model.
- model = Model(inputs=inputs, outputs=OutputPath)
- return model
-
- ###############################################
- #create and compile the model
-
- if model_selection == 0: # ResNet
-
- model = RGhostnet(num_classes,
- input_shape =[num_freq_bin,num_time_bin,num_audio_channels],
- num_filters = num_filters,
- wd=wd,
- num_stacks = num_stacks,
- output_num_filters_factor = output_num_filters_factor,
- use_split = use_split)
-
- elif model_selection == 1: # 所要测试的网络
-
- model = 1
-
-
- model.summary()
-
- if focal_loss:
- model.compile(loss=[categorical_focal_loss(gamma=gamma, alpha=alpha)],
- optimizer =SGD(lr=max_lr,decay=0, momentum=0.9, nesterov=False),
- metrics=['accuracy'])
- else:
- model.compile(loss='categorical_crossentropy',
- optimizer =SGD(lr=max_lr,decay=0, momentum=0.9, nesterov=False),
- metrics=['accuracy'])
-
- #=========================================================================================================#
- # 断点续传读取记录
- # last_epoch, last_meta = get_last_status(model, savedir)
- last_epoch = -1
- last_meta = {}
-
- #set learning rate schedule
- lr_scheduler = LR_WarmRestart(nbatch=np.ceil(sample_num/batch_size),Tmult=2,
- initial_lr=max_lr, min_lr=max_lr*1e-4,
- epochs_restart = [3.0, 7.0, 15.0, 31.0, 63.0, 127.0, 255.0])
-
- # 进行回调的功能
- ckpt_path=savedir + '/bestModel.h5' # 模型保存路径
- cp_callback = MetaCheckpoint(filepath=ckpt_path,
- verbose=1,
- save_best_only=True,
- save_weights_only=False,
- monitor='val_accuracy',
- mode='max',
- meta=last_meta)
-
- callbacks = [lr_scheduler,cp_callback]
-
-
- #create data generator
- TrainDataGen = MixupGenerator_splitted(feat_path,
- train_csv,
- batch_size=batch_size,
- alpha=mixup_alpha,
- splitted_num=20,
- crop_length=crop_length)()
-
- #train the model
- history = model.fit(TrainDataGen,
- validation_data=(data_val, y_val),
- epochs=num_epochs,
- verbose=2,
- workers=8,
- shuffle=True,
- # workers=1, # 固定随机值
- # shuffle=False, # 固定随机值
- max_queue_size = 100,
- callbacks=callbacks,
- steps_per_epoch=np.ceil(sample_num/batch_size),
- initial_epoch=last_epoch+1)
-
-
- # # 检查 meta
- # last_meta = load_meta(savedir + "/bestModel.h5")
- # print(last_meta.get('val_accuracy'))
-
- #=================================== inference ============================================
- dev_test_df = pd.read_csv(val_csv,sep='\t', encoding='ASCII')
- wav_paths = dev_test_df['filename'].tolist()
- ClassNames = np.unique(dev_test_df['scene_label'])
-
- for idx, elem in enumerate(wav_paths):
- wav_paths[idx] = wav_paths[idx].split('/')[-1].split('.')[0]
- wav_paths[idx] = wav_paths[idx].split('-')[-1]
-
- device_idxs = wav_paths
- device_list = np.unique(device_idxs)
- print(device_list)
-
- if focal_loss:
- best_model = keras.models.load_model(model_path, custom_objects={'categorical_focal_loss_fixed': categorical_focal_loss(gamma=1.0, alpha=0.3)})
- else:
- best_model = keras.models.load_model(model_path)
-
- preds = best_model.predict(data_val)
- y_pred_val = np.argmax(preds,axis=1)
-
- over_loss = log_loss(y_val_onehot, preds)
- overall_acc = np.sum(y_pred_val==y_val_infer) / data_val.shape[0]
-
- print(y_val_onehot.shape, preds.shape)
- np.set_printoptions(precision=3)
-
- print("\n\nVal acc: ", "{0:.5f}".format(overall_acc))
- print("Val log loss:", "{0:.5f}".format(over_loss))
-
- # 实现输出在每个设备中各个场景的分类准确率
- class_device_acc = {}
- for class_name in ClassNames:
- class_device_acc[class_name] = []
- for device_id in device_list:
- cur_y_pred_val = np.array([y_pred_val[i] for i in range(len(device_idxs)) if device_idxs[i] == device_id and y_val_infer[i] == ClassNames.tolist().index(class_name)])
- cur_y_val = [y_val_infer[i] for i in range(len(device_idxs)) if device_idxs[i] == device_id and y_val_infer[i] == ClassNames.tolist().index(class_name)]
- if len(cur_y_pred_val) > 0:
- cur_acc = np.sum(cur_y_pred_val==cur_y_val) / len(cur_y_pred_val)
- class_device_acc[class_name].append(cur_acc)
- else:
- class_device_acc[class_name].append(0)
-
- print("\nPer-class device val acc: ")
- for class_name in ClassNames:
- print(["{:.3f}".format(num) for num in class_device_acc[class_name]])
-
- device_acc = []
- device_loss = []
- for device_id in device_list:
- cur_preds = np.array([preds[i] for i in range(len(device_idxs)) if device_idxs[i] == device_id])
- cur_y_pred_val = np.argmax(cur_preds,axis=1)
- cur_y_val_onehot = np.array([y_val_onehot[i] for i in range(len(device_idxs)) if device_idxs[i] == device_id])
- cur_y_val = [y_val_infer[i] for i in range(len(device_idxs)) if device_idxs[i] == device_id]
- cur_loss = log_loss(cur_y_val_onehot, cur_preds)
- cur_acc = np.sum(cur_y_pred_val==cur_y_val) / len(cur_preds)
-
- device_acc.append(cur_acc)
- device_loss.append(cur_loss)
-
- print("\n\nDevices list: ", device_list)
- print("Per-device val acc : ", np.array(device_acc))
- print("Device A acc: ", "{0:.3f}".format(device_acc[0]))
- print("Device B & C acc: ", "{0:.3f}".format((device_acc[1] + device_acc[2]) / 2))
- print("Device s1 & s2 & s3 acc: ", "{0:.3f}".format((device_acc[3] + device_acc[4] + device_acc[5]) / 3))
- print("Device s4 & s5 & s6 acc: ", "{0:.3f}".format((device_acc[6] + device_acc[7] + device_acc[8]) / 3))
-
-
- # get confusion matrix
- conf_matrix = confusion_matrix(y_val_infer,y_pred_val)
- print("\n\nConfusion matrix:")
- # print(conf_matrix)
- conf_matrix_a = np.array(conf_matrix)
- conf_matrix_comma = np.array2string(conf_matrix_a, separator=', ')
- print(conf_matrix_comma)
- conf_mat_norm_recall = conf_matrix.astype('float32')/conf_matrix.sum(axis=1)[:,np.newaxis]
- recall_by_class = np.diagonal(conf_mat_norm_recall)
- mean_recall = np.mean(recall_by_class)
-
- print("Class names:", ClassNames)
- print("Per-class val acc: ",recall_by_class, "\n\n")
|