Chen
/
dcase

 
			
							# 波形图
import librosa
import librosa.display
import matplotlib.pyplot as plt

# 加载三个音频文件
audio_files = ['shopping_mall-barcelona-126-3744-a.wav', 'shopping_mall-barcelona-126-3744-b.wav', 'shopping_mall-barcelona-126-3744-c.wav']
y = []
sr = []
for file in audio_files:
    y_tmp, sr_tmp = librosa.load(file)
    y.append(y_tmp)
    sr.append(sr_tmp)

# 创建一个axes对象
fig, ax = plt.subplots(figsize=(23, 9))

# 绘制每个音频文件的波形图
for i in range(len(audio_files)):
    librosa.display.waveplot(y[i], sr=sr[i], alpha=1/(i+1), ax=ax)

# 设置x轴范围
plt.xlim([4.3, 4.8])

# 调整x轴和y轴的刻度标签的字体大小
ax.tick_params(axis='x', labelsize=12)
ax.tick_params(axis='y', labelsize=12)
# 添加图例和标题
ax.legend(['A','B','C'], fontsize=20)

plt.savefig('audio_waveform.png')
# 显示图像

plt.xlabel('Time(seconds)')
plt.ylabel('Amplitude')

plt.show()

#######################################################################
# 对数梅尔谱图
import numpy as np

# 加载音频文件
audio_file = 'airport-london-5-230-a.wav'
y, sr = librosa.load(audio_file)

# 计算Mel频谱
n_fft = 2048
hop_length = 1024
n_mels = 128
S = librosa.feature.melspectrogram(y=y, sr=sr, n_fft=n_fft, hop_length=hop_length, n_mels=n_mels)
log_S = librosa.power_to_db(S, ref=np.max)

# 绘制Mel频谱
# plt.figure(figsize=(12, 4))
# librosa.display.specshow(log_S, sr=sr, hop_length=hop_length, x_axis='time', y_axis='mel')
plt.figure(figsize=(9, 9))
librosa.display.specshow(log_S, sr=sr, hop_length=hop_length)

plt.savefig('logmeel-a.png')
# 显示图像
# plt.colorbar(format='%+02.0f dB')
# plt.title('Mel spectrogram')
plt.tight_layout()
plt.show()

#######################################################################
# Z-score后的对数梅尔谱图
import pickle

feat_mtx = []
with open('bus-barcelona-15-599-a.logmel', 'rb') as f:
    temp=pickle.load(f, encoding='latin1')
    feat_mtx.append(temp['feat_data'])
    
logmel = np.squeeze(feat_mtx, axis=(0, 3))

plt.figure(figsize=(16, 9))
librosa.display.specshow(logmel, sr=44100, cmap='jet') # cmap='gray'

plt.savefig('1.png')
# 显示图像
plt.tight_layout()
plt.show()


################################### 横向柱形图 #####################################
import matplotlib.pyplot as plt
from matplotlib import rcParams

rcParams.update({'font.size': 24})

plt.rcParams['axes.unicode_minus'] = False # 解决负号'-'显示为方块的问题

# 定义数据
methods = ['四种方法组合', '三种方法组合', '两种方法组合', '音频数据增强', '频谱矫正', '频谱增强', '$\mathrm{Mixup}$数据增强', '基线']
accuracies = [53.8, 50.2, 48.8, 49.0, 47.6, 47.1, 47.9, 47.0]
len_x = len(methods)

# 设置图形尺寸
plt.figure(figsize=(16, 9))

plt.barh(range(len_x), accuracies, align='center', height=0.5)
texts = plt.yticks(range(len_x), methods, fontproperties='SimSun')[1]
for text in texts:
    text.set_fontsize(26)
plt.xlim(42.0, 56.0)
plt.gca().set_xticklabels(['{:.0f}%'.format(x) for x in plt.gca().get_xticks()])

# 添加标签
for i, v in enumerate(accuracies):
    plt.text(v + 0.8, i, str(v)+'%', ha='center', va='center')

plt.legend(['平均分类准确率'], loc='lower center', bbox_to_anchor=(0.5, -0.175), prop={'family': 'SimSun', 'size': 26},
           facecolor='none', edgecolor='none')

plt.savefig('1.png')

# 显示图形
plt.show()

################################### 混淆矩阵 #####################################
import matplotlib.pyplot as plt
import numpy as np

def plot_confusion_matrix(cm, classes=['机场', '公交车', '地铁', '地铁站', '公园', '广场', '商场', '步行街', '街道', '电车'],
                          normalize=False,
                          cmap=plt.cm.Blues,
                          png_name = 'base_origin_speccor.png'):

    if normalize is True:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
    fig, ax = plt.subplots(figsize=(10, 10))
    im = ax.imshow(cm, interpolation='nearest', cmap=cmap)
    ax.figure.colorbar(im, ax=ax, fraction=0.046, pad=0.04)
    # We want to show all ticks...
    ax.set(xticks=np.arange(cm.shape[1]),
           yticks=np.arange(cm.shape[0]))
    ax.set_xticklabels(classes, fontproperties='SimSun')
    ax.set_yticklabels(classes, fontproperties='SimSun')

    ax.tick_params(axis='y', which='both', labelsize=22)
    ax.tick_params(axis='x', which='both', labelsize=22, top=True, bottom=False, labeltop=True, labelbottom=False, rotation=45)
    plt.setp(ax.get_xticklabels(), ha="left")    

    # Loop over data dimensions and create text annotations.
    if normalize is True:
        fmt = '.3f'
    else:
        fmt = 'd'
    thresh = cm.max() / 2.
    for i in range(cm.shape[0]):
        for j in range(cm.shape[1]):
            ax.text(j, i, format(cm[i, j], fmt),
                    ha="center", va="center",
                    color="white" if cm[i, j] > thresh else "black",
                    fontsize=15)
    
    fig.tight_layout()
    plt.savefig(png_name)
    return

confusion_matrix =[[172,   0,   1,  11,   2,   2,  51,  57,   0,   1],
 [  0, 258,  13,   0,   7,   0,   0,   0,   2,  17],
 [  4,  14, 217,   9,   3,   2,   0,   1,   2,  45],
 [ 17,   3,  19, 218,   2,   1,  27,   8,   1,   1],
 [  4,   0,   3,   1, 234,  29,   0,   4,  19,   3],
 [  8,   0,   2,   7,  53, 170,   8,  22,  27,   0],
 [ 43,   0,   0,  23,   0,   5, 195,  30,   0,   1],
 [ 29,   2,   1,  21,   6,  32,  50, 151,   5,   0],
 [  2,   3,   2,   2,  16,  18,   0,   3, 251,   0],
 [  0,  19,  49,   3,   9,   4,   0,   0,   2, 211]]

cm = np.array(confusion_matrix)

plot_confusion_matrix(cm)