Chen
/
dcase

 
			
							# 实现 splitted 训练 并输出结果

# 固定 seed 值（只能保证误差较小）===================== 不使用固定seed可以大大加快训练速度，但最终结果会有3%左右的波动
# model.fit() 中的 shuffle=False，worker=1

# SEED = 1    # 固定随机值
import os
# os.environ['PYTHONHASHSEED'] = str(SEED)    # 固定随机值

import random
# random.seed(SEED)   # 固定随机值

import tensorflow
# tensorflow.random.set_seed(SEED)    # 固定随机值

import numpy as np
# np.random.seed(SEED)    # 固定随机值
# os.environ['TF_DETERMINISTIC_OPS'] = '1'    # 固定随机值（需要安装 tensorflow-determinism 才有效）
#===================================

import keras
import time
import h5py
import yaml
import pickle
import pandas as pd

os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

from keras import backend as K
import threading

# 用于 inference
import pandas as pd
from sklearn.metrics import log_loss
from sklearn.metrics import confusion_matrix

from tensorflow.keras.layers import Dense, Dropout, multiply, Reshape, Conv1D
import math

from tensorflow.keras.optimizers import SGD
from tensorflow.keras.layers import Conv2D, BatchNormalization, Activation, GlobalAveragePooling2D
from tensorflow.keras.layers import AveragePooling2D, Input, concatenate, Lambda
from tensorflow.keras.regularizers import l2
from tensorflow.keras.models import Model

print("tensorflow version = ",tensorflow.__version__)

#=========================================================================================================#
data_path = '/dataset'
train_csv = data_path + '/train_all.csv'
val_csv = data_path + '/fold1_evaluate.csv'

# logmel 特征位置
feat_path = '/dataset/features'

# h5 文件保存地址
savedir = '/model/Chen'
if not os.path.exists(savedir):
    os.makedirs(savedir)

# 读取保存的最佳模型 h5 文件
model_path = savedir + "/bestModel.h5"

sr = 44100    # 48KHz
num_audio_channels = 1  # 单声道
duration = 10   # 采样频率

#log-mel spectrogram parameters
num_freq_bin = 128  # 128个梅尔滤波器个数，会形成128维频域特征
num_fft = 2048   # 短时傅里叶变化点、窗口大小
hop_length = int(num_fft/2)   # 窗口滑动长度为1024
num_time_bin = 423  # 时域样本: int(np.ceil(duration * sr / hop_length))

#training parameters
num_classes = 10
max_lr = 0.1
batch_size = 32
num_epochs = 64     #TODO
mixup_alpha = 0.4
sample_num = len(open(train_csv, 'r').readlines()) - 1

crop_length = 0    # 随机裁剪后的长度

num_filters = 32
wd = 1e-3
output_num_filters_factor = 1
num_stacks = 4    # number of residual stacks

delta = True    # 使用该方法需内存 >16G
focal_loss = False   # 是否使用 focal_loss
gamma=1.0
alpha=0.3
use_split = False    # 是否使用双通道
model_selection = 0
# '''
# 0) R-GhostNet
# 1) TODO
# '''

#=========================================================================================================#
# 加载 logmel 文件，获取特征和标签
def load_data_2020(feat_path, csv_path):
    with open(csv_path, 'r') as text_file:
        lines = text_file.read().split('\n')
        for idx, elem in enumerate(lines):
            lines[idx] = lines[idx].split('\t')
            lines[idx][0] = lines[idx][0].split('/')[-1].split('.')[0]

        lines = lines[1:]
        lines = [elem for elem in lines if elem != ['']]
        for idx, elem in enumerate(lines):
            lines[idx][-1] = lines[idx][-1].split('\r')[0]
        label_info = np.array(lines)
        
        data_df = pd.read_csv(csv_path, sep='\t', encoding='ASCII')
        labels = data_df['scene_label'].astype('category').cat.codes.values

        feat_mtx = []
        for [filename, labnel] in label_info:
            filepath = feat_path + '/' + filename + '.logmel' 
            with open(filepath,'rb') as f:
                temp=pickle.load(f, encoding='latin1')
                feat_mtx.append(temp['feat_data'])

        feat_mtx = np.array(feat_mtx)
        return feat_mtx, labels

def load_data_2020_splitted(feat_path, csv_path, idxlines):
    with open(csv_path, 'r') as text_file:
        lines = text_file.read().split('\n')
        for idx, elem in enumerate(lines):
            lines[idx] = lines[idx].split('\t')
            lines[idx][0] = lines[idx][0].split('/')[-1].split('.')[0]

        lines = lines[1:]
        lines = [lines[i] for i in idxlines]
        lines = [elem for elem in lines if elem != ['']]
        for idx, elem in enumerate(lines):
            lines[idx][-1] = lines[idx][-1].split('\r')[0]
        label_info = np.array(lines)
        
        data_df = pd.read_csv(csv_path, sep='\t', encoding='ASCII')
        ClassNames = np.unique(data_df['scene_label'])
        labels = data_df['scene_label'].astype('category').cat.codes.values
        labels = [labels[i] for i in idxlines]

        feat_mtx = []
        for [filename, label] in label_info:
            filepath = feat_path + '/' + filename + '.' + 'logmel'
            with open(filepath,'rb') as f:
                temp=pickle.load(f, encoding='latin1')
                feat_mtx.append(temp['feat_data'])

        feat_mtx = np.array(feat_mtx)
        return feat_mtx, labels

# calculate_deltas
def deltas(X_in):
    X_out = (X_in[:,:,2:,:]-X_in[:,:,:-2,:])/10.0
    X_out = X_out[:,:,1:-1,:]+(X_in[:,:,4:,:]-X_in[:,:,:-4,:])/5.0
    return X_out


# calculate_deltas
def calculate_deltas(LM_feature):
    LM_deltas_data = deltas(LM_feature)
    LM_deltas_deltas_data = deltas(LM_deltas_data)
    LM_feature = np.concatenate((LM_feature[:,:,4:-4,:],LM_deltas_data[:,:,2:-2,:],LM_deltas_deltas_data),axis=-1)

    return LM_feature

#=========================================================================================================#
extract_time = time.time()
data_val, y_val = load_data_2020(feat_path, val_csv)

# 用于 inference
y_val_infer = y_val

if delta:
    data_deltas_val = deltas(data_val)
    data_deltas_deltas_val = deltas(data_deltas_val)
    data_val = np.concatenate((data_val[:,:,4:-4,:], data_deltas_val[:,:,2:-2,:], data_deltas_deltas_val), axis=-1)
y_val = tensorflow.keras.utils.to_categorical(y_val, num_classes)

# 用于 inference
y_val_onehot = y_val

print(time.time() - extract_time)

if delta:
    num_audio_channels *= 3

#=========================================================================================================#
class LR_WarmRestart(tensorflow.keras.callbacks.Callback):
    
    def __init__(self,nbatch,initial_lr,min_lr,epochs_restart,Tmult):
        self.initial_lr = initial_lr
        self.min_lr = min_lr
        self.epochs_restart = epochs_restart
        self.nbatch = nbatch
        self.currentEP=0
        self.startEP=0
        self.Tmult=Tmult
        
    def on_epoch_begin(self, epoch, logs={}):
        if epoch+1<self.epochs_restart[0]:
            self.currentEP = epoch
        else:
            self.currentEP = epoch+1
            
        if np.isin(self.currentEP,self.epochs_restart):
            self.startEP=self.currentEP
            self.Tmult=2*self.Tmult
        
    def on_epoch_end(self, epochs, logs={}):
        lr = K.get_value(self.model.optimizer.lr)
        print ('\nLearningRate:{:.6f}'.format(lr))
    
    def on_batch_begin(self, batch, logs={}):
        pts = self.currentEP + batch/self.nbatch - self.startEP
        decay = 1+np.cos(pts/self.Tmult*np.pi)
        lr = self.min_lr+0.5*(self.initial_lr-self.min_lr)*decay
        K.set_value(self.model.optimizer.lr,lr)

        
class threadsafe_iter:
    """Takes an iterator/generator and makes it thread-safe by
    serializing call to the `next` method of given iterator/generator.
    """
    def __init__(self, it):
        self.it = it
        self.lock = threading.Lock()

    def __iter__(self):
        return self

    # python 3
    def __next__(self):
        with self.lock:
            return self.it.__next__()
        
    # python 2
    # def next(self):
    #     with self.lock:
    #         return self.it.next()
        
def threadsafe_generator(f):
    """A decorator that takes a generator function and makes it thread-safe.
    """
    def g(*a, **kw):
        return threadsafe_iter(f(*a, **kw))
    return g

# 时频域掩膜/遮蔽
def frequency_masking(mel_spectrogram, frequency_masking_para=13, frequency_mask_num=1):
    fbank_size = mel_spectrogram.shape

    for i in range(frequency_mask_num):
        f = random.randrange(0, frequency_masking_para)
        f0 = random.randrange(0, fbank_size[0] - f)
        
        if (f0 == f0 + f):
            continue

        mel_spectrogram[f0:(f0+f),:] = 0
    return mel_spectrogram

def time_masking(mel_spectrogram, time_masking_para=40, time_mask_num=1):
    fbank_size = mel_spectrogram.shape

    for i in range(time_mask_num):
        t = random.randrange(0, time_masking_para)
        t0 = random.randrange(0, fbank_size[1] - t)

        if (t0 == t0 + t):
            continue

        mel_spectrogram[:, t0:(t0+t)] = 0
    return mel_spectrogram

class MixupGenerator_splitted():
    '''
    Reference: https://github.com/yu4u/mixup-generator
    '''
    
    def __init__(self, feat_path, train_csv, batch_size=32, alpha=0.2, shuffle=True, splitted_num=4, crop_length=400):
        self.feat_path = feat_path
        self.train_csv = train_csv
        self.batch_size = batch_size
        self.alpha = alpha
        self.shuffle = shuffle
        self.sample_num = len(open(train_csv, 'r').readlines())-1
        self.lock = threading.Lock()
        self.NewLength = crop_length
        self.splitted_num = splitted_num
        
    def __iter__(self):
        return self
    
    @threadsafe_generator
    def __call__(self):
        with self.lock:
            while True:
                indexes = self.__get_exploration_order()

                item_num = self.sample_num // self.splitted_num - (self.sample_num // self.splitted_num) % self.batch_size

                for k in range(self.splitted_num):
                    cur_item_num = item_num
                    s = k * item_num
                    e = (k+1) * item_num 
                    if k == self.splitted_num - 1:
                        cur_item_num = self.sample_num - (self.splitted_num - 1) * item_num
                        e = self.sample_num

                    lines = indexes[s:e]
                    X_train, y_train = load_data_2020_splitted(self.feat_path, self.train_csv, lines)
                    y_train = tensorflow.keras.utils.to_categorical(y_train, 10)
                    X_deltas_train = deltas(X_train)
                    X_deltas_deltas_train = deltas(X_deltas_train)
                    X_train = np.concatenate((X_train[:,:,4:-4,:], X_deltas_train[:,:,2:-2,:], X_deltas_deltas_train), axis=-1)
                    
                    itr_num = int(cur_item_num // (self.batch_size * 2))
                    
                    for i in range(itr_num):
                        batch_ids = np.arange(cur_item_num)[i*self.batch_size * 2:(i + 1) * self.batch_size * 2]
                        X, y = self.__data_generation(batch_ids, X_train, y_train)
                        yield X, y

    def __get_exploration_order(self):
        indexes = np.arange(self.sample_num)

        if self.shuffle:
            np.random.shuffle(indexes)

        return indexes

    def __data_generation(self, batch_ids, X_train, y_train):
        _, h, w, c = X_train.shape
        l = np.random.beta(self.alpha, self.alpha, self.batch_size)
        X_l = l.reshape(self.batch_size, 1, 1, 1)
        y_l = l.reshape(self.batch_size, 1)

        X1 = X_train[batch_ids[:self.batch_size]]
        X2 = X_train[batch_ids[self.batch_size:]]
        
        for j in range(X1.shape[0]):

            # time & frequency masking
            for c in range(X1.shape[3]):
                X1[j, :, :, c] = frequency_masking(X1[j, :, :, c])
                X1[j, :, :, c] = time_masking(X1[j, :, :, c])
                X2[j, :, :, c] = frequency_masking(X2[j, :, :, c])
                X2[j, :, :, c] = time_masking(X2[j, :, :, c])
            
            # random cropping
            if self.NewLength > 0:
                StartLoc1 = np.random.randint(0,X1.shape[2]-self.NewLength)
                StartLoc2 = np.random.randint(0,X2.shape[2]-self.NewLength)

                X1[j,:,0:self.NewLength,:] = X1[j,:,StartLoc1:StartLoc1+self.NewLength,:]
                X2[j,:,0:self.NewLength,:] = X2[j,:,StartLoc2:StartLoc2+self.NewLength,:]

        # random cropping
        if self.NewLength > 0:
            X1 = X1[:,:,0:self.NewLength,:]
            X2 = X2[:,:,0:self.NewLength,:]

        # mixup
        X = X1 * X_l + X2 * (1.0 - X_l)

        if isinstance(y_train, list):
            y = []

            for y_train_ in y_train:
                y1 = y_train_[batch_ids[:self.batch_size]]
                y2 = y_train_[batch_ids[self.batch_size:]]
                y.append(y1 * y_l + y2 * (1.0 - y_l))
        else:
            y1 = y_train[batch_ids[:self.batch_size]]
            y2 = y_train[batch_ids[self.batch_size:]]
            y = y1 * y_l + y2 * (1.0 - y_l)
            

        return X, y

        
def categorical_focal_loss(gamma=2., alpha=.25):
    """
    Reference: https://github.com/umbertogriffo/focal-loss-keras
    
    Softmax version of focal loss.
            m
      FL = SUM -alpha * (1 - p_o,c)^gamma * y_o,c * log(p_o,c)
           c=1
      where m = number of classes, c = class and o = observation
    Parameters:
      alpha -- the same as weighing factor in balanced cross entropy
      gamma -- focusing parameter for modulating factor (1-p)
    Default value:
      gamma -- 2.0 as mentioned in the paper
      alpha -- 0.25 as mentioned in the paper
    References:
        Official paper: https://arxiv.org/pdf/1708.02002.pdf
        https://www.tensorflow.org/api_docs/python/tf/keras/backend/categorical_crossentropy
    Usage:
     model.compile(loss=[categorical_focal_loss(alpha=.25, gamma=2)], metrics=["accuracy"], optimizer=adam)
    """
    def categorical_focal_loss_fixed(y_true, y_pred):
        """
        :param y_true: A tensor of the same shape as `y_pred`
        :param y_pred: A tensor resulting from a softmax
        :return: Output tensor.
        """

        # Scale predictions so that the class probas of each sample sum to 1
        y_pred /= K.sum(y_pred, axis=-1, keepdims=True)

        # Clip the prediction value to prevent NaN's and Inf's
        epsilon = K.epsilon()
        y_pred = K.clip(y_pred, epsilon, 1. - epsilon)

        # Calculate Cross Entropy
        cross_entropy = -y_true * K.log(y_pred)

        # Calculate Focal Loss
        loss = alpha * K.pow(1 - y_pred, gamma) * cross_entropy

        # Compute mean loss in mini_batch
        return K.mean(loss, axis=1)

    return categorical_focal_loss_fixed

# 自定义 checkpoint
class MetaCheckpoint(tensorflow.keras.callbacks.ModelCheckpoint):
    def __init__(self, filepath, monitor='val_loss', verbose=0,
                 save_best_only=False, save_weights_only=False,
                 mode='auto', save_freq='epoch', training_args=None, meta=None):

        super(MetaCheckpoint, self).__init__(filepath,
                                             monitor=monitor,
                                             verbose=verbose,
                                             save_best_only=save_best_only,
                                             save_weights_only=save_weights_only,
                                             mode=mode,
                                             save_freq=save_freq)

        self.filepath = filepath
        self.new_file_override = True
        self.meta = meta or {'epochs': [], self.monitor: []}

        if training_args:
            self.meta['training_args'] = training_args

    def on_train_begin(self, logs={}):
        if self.save_best_only:
            if 'accuracy' in self.monitor or self.monitor.startswith('fmeasure'):
                self.best = max(self.meta[self.monitor], default=-np.Inf)
            else:
                self.best = min(self.meta[self.monitor], default=np.Inf)

        super(MetaCheckpoint, self).on_train_begin(logs)

    def on_epoch_end(self, epoch, logs={}):
        # 只有在‘只保存’最优版本且生成新的.h5文件的情况下
        if self.save_best_only:
            current = logs.get(self.monitor)
            if self.monitor_op(current, self.best):
                self.new_file_override = True
            else:
                self.new_file_override = False

        super(MetaCheckpoint, self).on_epoch_end(epoch, logs)

        # Get statistics
        self.meta['epochs'].append(epoch)
        for k, v in logs.items():
            # Get default gets the value or sets (and gets) the default value
            self.meta.setdefault(k, []).append(v)

        # Save to file
        filepath = self.filepath.format(epoch=epoch, **logs)

        if self.new_file_override and self.epochs_since_last_save == 0:
            # 只有在‘只保存’最优版本且生成新的.h5文件的情况下 才会继续添加meta
            with h5py.File(filepath, 'r+') as f:
                meta_group = f.create_group('meta')
                meta_group.attrs['training_args'] = yaml.dump(
                    self.meta.get('training_args', '{}'))
                meta_group.create_dataset('epochs', data=np.array(self.meta['epochs']))
                for k in logs:
                    meta_group.create_dataset(k, data=np.array(self.meta[k]))

# 实现断点续传功能
def load_meta(model_fname):
    """
    Load meta configuration
    :param model_fname: model file name
    :return: meta info
    """
    meta = {}

    with h5py.File(model_fname, 'r') as f:
        meta_group = f['meta']

        meta['training_args'] = yaml.load(
            meta_group.attrs['training_args'])
        for k in meta_group.keys():
            meta[k] = list(meta_group[k])

    return meta

def get_last_status(model, savedir):
    last_epoch = -1
    last_meta = {}
    if os.path.exists(savedir + "/bestModel.h5"):
        model.load_weights(savedir + "/bestModel.h5")
        last_meta = load_meta(savedir + "/bestModel.h5")
        last_epoch = last_meta.get('epochs')[-1]
    return last_epoch, last_meta

#=========================================================================================================#
#network definition
def gca_block(x, b=1, gama=2):
    
    # 根据公式计算自适应卷积核大小
    kernel_size = int(abs((math.log(x.shape[-1], 2) + b) / gama))
    if kernel_size % 2:
        kernel_size = kernel_size
    else:
        kernel_size = kernel_size + 1

    x_shape = K.int_shape(x)
    [b, h, w, c] = x_shape
    
    x_h = AveragePooling2D(pool_size=(1, w), strides=1, padding='valid')(x)
    x_w = AveragePooling2D(pool_size=(h, 1), strides=1, padding='valid')(x)
    x_w = tensorflow.transpose(x_w, [0, 2, 1, 3])
    
    y = tensorflow.concat([x_h, x_w], axis=1)
    
    y = Conv1D(filters=1, kernel_size=kernel_size, padding='same', use_bias=False)(y)
    y = Activation('gelu')(y)
    
    x_h, x_w = tensorflow.split(y, num_or_size_splits=[h, w], axis=1)
    x_w = tensorflow.transpose(x_w, [0, 2, 1, 3])
    
    a_h = Conv1D(filters=1, kernel_size=kernel_size, padding='same', use_bias=False)(x_h)
    a_h = Activation('sigmoid')(a_h)
    a_w = Conv1D(filters=1, kernel_size=kernel_size, padding='same', use_bias=False)(x_w)
    a_w = Activation('sigmoid')(a_w)
    
    out = x * a_h * a_w
    return out

# def eca_block(inputs, b=1, gama=2):
    
#     in_channel = inputs.shape[-1]
#     kernel_size = int(abs((math.log(in_channel, 2) + b) / gama))    # 根据公式计算自适应卷积核大小
#     if kernel_size % 2:
#         kernel_size = kernel_size
#     else:
#         kernel_size = kernel_size + 1
    
#     # [h,w,c]==>[None,c] 全局平均池化
#     x = GlobalAveragePooling2D()(inputs)
#     # [None,c]==>[c,1]
#     x = Reshape(target_shape=(in_channel, 1))(x)
#     # [c,1]==>[c,1]
#     x = Conv1D(filters=1, kernel_size=kernel_size, padding='same', use_bias=False)(x)
#     # sigmoid激活
#     x = Activation('sigmoid')(x)
#     # [c,1]==>[1,1,c]
#     x = Reshape((1,1,in_channel))(x)
    
#     outputs = multiply([inputs, x])
#     return outputs

# def coordinate_attention(x, reduction_ratio=8):

#     def h_swish(x):
#         tmpx = tensorflow.nn.relu6(x+3) / 6
#         x = x * tmpx
#         return x

#     x_shape = K.int_shape(x)
#     [b, h, w, c] = x_shape
    
#     x_h = AveragePooling2D(pool_size=(1, w), strides=1, padding='valid')(x)
#     x_w = AveragePooling2D(pool_size=(h, 1), strides=1, padding='valid')(x)
#     x_w = tensorflow.transpose(x_w, [0, 2, 1, 3])

#     y = tensorflow.concat([x_h, x_w], axis=1)
    
#     mip = max(8, c // reduction_ratio)
#     y = Conv2D(mip, (1, 1), strides=1, padding='valid')(y)
    
#     y = BatchNormalization(axis=-1)(y)
#     y = h_swish(y)
    
#     x_h, x_w = tensorflow.split(y, num_or_size_splits=[h, w], axis=1)
#     x_w = tensorflow.transpose(x_w, [0, 2, 1, 3])
    
#     a_h = Conv2D(c, (1, 1), strides=1, padding='same')(x_h)
#     a_h = Activation('sigmoid')(a_h)
#     a_w = Conv2D(c, (1, 1), strides=1, padding='same')(x_w)
#     a_w = Activation('sigmoid')(a_w)
    
#     out = x * a_h * a_w
#     return out

# def squeeze_and_excite(inputs, ratio=4):
#     squeeze = inputs.shape[-1] // ratio   # 第一个FC降低通道数个数
#     excitation = inputs.shape[-1]   # 第二个FC上升通道数个数
    
#     x = GlobalAveragePooling2D()(inputs)
#     x = Dense(squeeze)(x)
#     x = Activation('relu')(x)
#     x = Dense(excitation)(x)
#     x = Activation('sigmoid')(x)

#     x = multiply([inputs, x])
#     return x

def ghost_conv(inputs, out_channels, use_relu=True):
    
    x1 = Conv2D(out_channels // 2, kernel_size=(1,1), strides=1,
                padding="same", use_bias=False)(inputs)
    x1 = BatchNormalization()(x1)
    if use_relu:
        x1 = Activation('gelu')(x1)
    x2 = Conv2D(out_channels // 2, kernel_size=(3, 3), strides=1,
                padding="same", use_bias=False, groups=out_channels // 2)(x1)
    x2 = BatchNormalization()(x2)
    if use_relu:
        x2 = Activation('relu')(x2)
    return concatenate([x1, x2], axis=-1)

def resnet_layer(inputs,num_filters=16,kernel_size=3,strides=1,learn_bn = True,wd=1e-4, use_relu=True):

    x = inputs
    x = Conv2D(num_filters,kernel_size=kernel_size,strides=strides,padding='same',kernel_initializer='he_normal',
                  kernel_regularizer=l2(wd),use_bias=False)(x)
    x = BatchNormalization(center=learn_bn, scale=learn_bn)(x)
    if use_relu:
        x = Activation('relu')(x)

    return x

def pad_depth(inputs, desired_channels):
    from keras import backend as K
    y = K.zeros_like(inputs, name='pad_depth1')
    return y

def My_freq_split1(x):
    from keras import backend as K
    return x[:,0:64,:,:]

def My_freq_split2(x):
    from keras import backend as K
    return x[:,64:128,:,:]

def RG_bneck(inputs, num_filters, My_wd, num_stacks):

    num_res_blocks=2

    #===== ealry fusion =====
    ResidualPath1 = resnet_layer(inputs=inputs,
                     num_filters=num_filters,
                     strides=2,
                     learn_bn = True,
                     wd=My_wd,
                     use_relu = False)
    ResidualPath1 = gca_block(ResidualPath1)
    
    ResidualPath2 = resnet_layer(inputs=inputs,
                     num_filters=num_filters,
                     strides=[2,1],
                     learn_bn = True,
                     wd=My_wd,
                     use_relu = False)
    ResidualPath2 = gca_block(ResidualPath2)
    ResidualPath = tensorflow.concat([ResidualPath1, ResidualPath2], axis=2)
    #========================

    # ResidualPath = resnet_layer(inputs=inputs,
    #                  num_filters=num_filters,
    #                  strides=2,
    #                  learn_bn = True,
    #                  wd=My_wd,
    #                  use_relu = False)

    # Instantiate the stack of residual units
    for stack in range(num_stacks):
        for res_block in range(num_res_blocks):
            strides = 1
            if stack > 0 and res_block == 0:  # first layer but not first stack
                strides = 2  # downsample

            if strides != 1:
                # 升维
                ConvPath = ghost_conv(inputs=ResidualPath, out_channels=ResidualPath.shape[-1]*4)
                ConvPath = Conv2D(ConvPath.shape[-1], kernel_size=(3, 3), strides=strides,
                                    padding="same", use_bias=False, groups=ConvPath.shape[-1])(ConvPath)
                ConvPath = BatchNormalization()(ConvPath)
                # 降维时不需要做非线性激活会破坏特征
                ConvPath = ghost_conv(inputs=ConvPath, out_channels=num_filters, use_relu=False)

            else:
                ConvPath = ghost_conv(inputs=ResidualPath, out_channels=ResidualPath.shape[-1]*4)
                ConvPath = ghost_conv(inputs=ConvPath, out_channels=num_filters, use_relu=False)
                ConvPath = tensorflow.keras.layers.add([ConvPath,ResidualPath])

            if stack > 0 and res_block == 0:
                ResidualPath = Conv2D(ResidualPath.shape[-1], kernel_size=(3, 3), strides=strides,
                                    padding="same", use_bias=False, groups=ResidualPath.shape[-1])(ResidualPath)
                ResidualPath = BatchNormalization()(ResidualPath)
                ResidualPath = Conv2D(num_filters, kernel_size=(1,1), strides=1,
                                    padding="same", use_bias=False)(ResidualPath)
                ResidualPath = BatchNormalization()(ResidualPath)

            ResidualPath = tensorflow.keras.layers.add([ConvPath,ResidualPath])
            
        #when we are here, we double the number of filters    
        num_filters *= 2
    
    return ResidualPath


def RGhostnet(num_classes,input_shape =[128,None,6], num_filters =24, 
                 wd=1e-3, num_stacks=4, output_num_filters_factor=1, use_split=False):
    
    My_wd = wd #this is 5e-3 in matlab, so quite large
    
    inputs = Input(shape=input_shape)
    x = inputs
    
    if use_split:
        #split up frequency into two branches
        Split1=  Lambda(My_freq_split1)(x)
        Split2=  Lambda(My_freq_split2)(x)

        ResidualPath1 = RG_bneck(Split1, num_filters, My_wd, num_stacks)
        ResidualPath2 = RG_bneck(Split2, num_filters, My_wd, num_stacks)
        ResidualPath = concatenate([ResidualPath1,ResidualPath2],axis=1)
    else:
        ResidualPath = RG_bneck(x, num_filters, My_wd, num_stacks)

    # 保证 num_filters 相同
    for _ in range(num_stacks):
        num_filters *= 2
    
    OutputPath = resnet_layer(inputs=ResidualPath,
                             num_filters=num_filters*output_num_filters_factor,
                             kernel_size=1,
                             strides=1,
                             learn_bn = False,
                             wd=My_wd,
                             use_relu = True)
    
    OutputPath = Dropout(0.3)(OutputPath)
    # OutputPath = gca_block(OutputPath)

    #output layers after last sum
    OutputPath = resnet_layer(inputs=OutputPath,
                     num_filters=num_classes,
                     strides = 1,
                     kernel_size=1,
                     learn_bn = False,
                     wd=My_wd,
                     use_relu=False)
    OutputPath = BatchNormalization(center=False, scale=False)(OutputPath)
    # OutputPath = GlobalAveragePooling2D()(OutputPath)
    # OutputPath = Activation('softmax')(OutputPath)
    
    #===== late fusion =====
    OutputPathShape = K.int_shape(OutputPath)
    if OutputPathShape[2]%2 == 1:
        tmp = int(OutputPathShape[2]/2)
        OutputPath1, OutputPath2 = tensorflow.split(OutputPath, num_or_size_splits=[tmp+1, tmp], axis=2)
    else:
        OutputPath1, OutputPath2 = tensorflow.split(OutputPath, num_or_size_splits=2, axis=2)
    
    # OutputPath1 = gca_block(OutputPath1)
    OutputPath1 = GlobalAveragePooling2D()(OutputPath1)
    OutputPath1 = Activation('softmax')(OutputPath1)

    OutputPath2 = GlobalAveragePooling2D()(OutputPath2)
    OutputPath2 = Activation('softmax')(OutputPath2)
    
    OutputPath = 0.5*OutputPath1 + 0.5*OutputPath2
    #========================

    # Instantiate model.
    model = Model(inputs=inputs, outputs=OutputPath)
    return model

###############################################
#create and compile the model

if model_selection == 0:    # ResNet
    
    model = RGhostnet(num_classes,
                        input_shape =[num_freq_bin,num_time_bin,num_audio_channels], 
                        num_filters = num_filters,
                        wd=wd,
                        num_stacks = num_stacks,
                        output_num_filters_factor = output_num_filters_factor,
                        use_split = use_split)
    
elif model_selection == 1:  # 所要测试的网络
    
     model = 1
                         
    
model.summary()

if focal_loss:
    model.compile(loss=[categorical_focal_loss(gamma=gamma, alpha=alpha)],
            optimizer =SGD(lr=max_lr,decay=0, momentum=0.9, nesterov=False),
            metrics=['accuracy'])        
else:
    model.compile(loss='categorical_crossentropy',
            optimizer =SGD(lr=max_lr,decay=0, momentum=0.9, nesterov=False),
            metrics=['accuracy'])

#=========================================================================================================#
# 断点续传读取记录
# last_epoch, last_meta = get_last_status(model, savedir)
last_epoch = -1
last_meta = {}

#set learning rate schedule
lr_scheduler = LR_WarmRestart(nbatch=np.ceil(sample_num/batch_size),Tmult=2,
                              initial_lr=max_lr, min_lr=max_lr*1e-4,
                              epochs_restart = [3.0, 7.0, 15.0, 31.0, 63.0, 127.0, 255.0])

# 进行回调的功能
ckpt_path=savedir + '/bestModel.h5'  # 模型保存路径
cp_callback = MetaCheckpoint(filepath=ckpt_path,
                             verbose=1,
                             save_best_only=True,
                             save_weights_only=False,
                             monitor='val_accuracy',
                             mode='max',
                             meta=last_meta)

callbacks = [lr_scheduler,cp_callback]


#create data generator
TrainDataGen = MixupGenerator_splitted(feat_path,
                                train_csv,
                                batch_size=batch_size,
                                alpha=mixup_alpha,
                                splitted_num=20,
                                crop_length=crop_length)()

#train the model
history = model.fit(TrainDataGen,
                                    validation_data=(data_val, y_val),
                                    epochs=num_epochs,
                                    verbose=2,
                                    workers=8,
                                    shuffle=True,
                                    # workers=1,      # 固定随机值
                                    # shuffle=False,  # 固定随机值
                                    max_queue_size = 100,
                                    callbacks=callbacks,
                                    steps_per_epoch=np.ceil(sample_num/batch_size),
                                    initial_epoch=last_epoch+1)


# # 检查 meta
# last_meta = load_meta(savedir + "/bestModel.h5")
# print(last_meta.get('val_accuracy'))

#=================================== inference ============================================
dev_test_df = pd.read_csv(val_csv,sep='\t', encoding='ASCII')
wav_paths = dev_test_df['filename'].tolist()
ClassNames = np.unique(dev_test_df['scene_label'])

for idx, elem in enumerate(wav_paths):
    wav_paths[idx] = wav_paths[idx].split('/')[-1].split('.')[0]
    wav_paths[idx] = wav_paths[idx].split('-')[-1]

device_idxs = wav_paths
device_list = np.unique(device_idxs) 
print(device_list)

if focal_loss:
    best_model = keras.models.load_model(model_path, custom_objects={'categorical_focal_loss_fixed': categorical_focal_loss(gamma=1.0, alpha=0.3)})
else:
    best_model = keras.models.load_model(model_path)

preds = best_model.predict(data_val)
y_pred_val = np.argmax(preds,axis=1)

over_loss = log_loss(y_val_onehot, preds)
overall_acc = np.sum(y_pred_val==y_val_infer) / data_val.shape[0]

print(y_val_onehot.shape, preds.shape)
np.set_printoptions(precision=3)

print("\n\nVal acc: ", "{0:.5f}".format(overall_acc))
print("Val log loss:", "{0:.5f}".format(over_loss))

# 实现输出在每个设备中各个场景的分类准确率
class_device_acc = {}
for class_name in ClassNames:
    class_device_acc[class_name] = []
    for device_id in device_list:
        cur_y_pred_val = np.array([y_pred_val[i] for i in range(len(device_idxs)) if device_idxs[i] == device_id and y_val_infer[i] == ClassNames.tolist().index(class_name)])
        cur_y_val = [y_val_infer[i] for i in range(len(device_idxs)) if device_idxs[i] == device_id and y_val_infer[i] == ClassNames.tolist().index(class_name)]
        if len(cur_y_pred_val) > 0:
            cur_acc = np.sum(cur_y_pred_val==cur_y_val) / len(cur_y_pred_val)
            class_device_acc[class_name].append(cur_acc)
        else:
            class_device_acc[class_name].append(0)
            
print("\nPer-class device val acc: ")
for class_name in ClassNames:
    print(["{:.3f}".format(num) for num in class_device_acc[class_name]])

device_acc = []
device_loss = []
for device_id in device_list:
    cur_preds = np.array([preds[i] for i in range(len(device_idxs)) if device_idxs[i] == device_id])
    cur_y_pred_val = np.argmax(cur_preds,axis=1)
    cur_y_val_onehot = np.array([y_val_onehot[i] for i in range(len(device_idxs)) if device_idxs[i] == device_id])
    cur_y_val = [y_val_infer[i] for i in range(len(device_idxs)) if device_idxs[i] == device_id]
    cur_loss = log_loss(cur_y_val_onehot, cur_preds)
    cur_acc = np.sum(cur_y_pred_val==cur_y_val) / len(cur_preds)
    
    device_acc.append(cur_acc)
    device_loss.append(cur_loss)
    
print("\n\nDevices list: ", device_list)
print("Per-device val acc : ", np.array(device_acc))
print("Device A acc: ", "{0:.3f}".format(device_acc[0]))
print("Device B & C acc: ", "{0:.3f}".format((device_acc[1] + device_acc[2]) / 2))
print("Device s1 & s2 & s3 acc: ", "{0:.3f}".format((device_acc[3] + device_acc[4] + device_acc[5]) / 3))
print("Device s4 & s5 & s6 acc: ", "{0:.3f}".format((device_acc[6] + device_acc[7] + device_acc[8]) / 3))


# get confusion matrix
conf_matrix = confusion_matrix(y_val_infer,y_pred_val)
print("\n\nConfusion matrix:")
# print(conf_matrix)
conf_matrix_a = np.array(conf_matrix)
conf_matrix_comma = np.array2string(conf_matrix_a, separator=', ')
print(conf_matrix_comma)
conf_mat_norm_recall = conf_matrix.astype('float32')/conf_matrix.sum(axis=1)[:,np.newaxis]
recall_by_class = np.diagonal(conf_mat_norm_recall)
mean_recall = np.mean(recall_by_class)

print("Class names:", ClassNames)
print("Per-class val acc: ",recall_by_class, "\n\n")