|
- # 波形图
- import librosa
- import librosa.display
- import matplotlib.pyplot as plt
-
- # 加载三个音频文件
- audio_files = ['shopping_mall-barcelona-126-3744-a.wav', 'shopping_mall-barcelona-126-3744-b.wav', 'shopping_mall-barcelona-126-3744-c.wav']
- y = []
- sr = []
- for file in audio_files:
- y_tmp, sr_tmp = librosa.load(file)
- y.append(y_tmp)
- sr.append(sr_tmp)
-
- # 创建一个axes对象
- fig, ax = plt.subplots(figsize=(23, 9))
-
- # 绘制每个音频文件的波形图
- for i in range(len(audio_files)):
- librosa.display.waveplot(y[i], sr=sr[i], alpha=1/(i+1), ax=ax)
-
- # 设置x轴范围
- plt.xlim([4.3, 4.8])
-
- # 调整x轴和y轴的刻度标签的字体大小
- ax.tick_params(axis='x', labelsize=12)
- ax.tick_params(axis='y', labelsize=12)
- # 添加图例和标题
- ax.legend(['A','B','C'], fontsize=20)
-
- plt.savefig('audio_waveform.png')
- # 显示图像
-
- plt.xlabel('Time(seconds)')
- plt.ylabel('Amplitude')
-
- plt.show()
-
- #######################################################################
- # 对数梅尔谱图
- import numpy as np
-
- # 加载音频文件
- audio_file = 'airport-london-5-230-a.wav'
- y, sr = librosa.load(audio_file)
-
- # 计算Mel频谱
- n_fft = 2048
- hop_length = 1024
- n_mels = 128
- S = librosa.feature.melspectrogram(y=y, sr=sr, n_fft=n_fft, hop_length=hop_length, n_mels=n_mels)
- log_S = librosa.power_to_db(S, ref=np.max)
-
- # 绘制Mel频谱
- # plt.figure(figsize=(12, 4))
- # librosa.display.specshow(log_S, sr=sr, hop_length=hop_length, x_axis='time', y_axis='mel')
- plt.figure(figsize=(9, 9))
- librosa.display.specshow(log_S, sr=sr, hop_length=hop_length)
-
- plt.savefig('logmeel-a.png')
- # 显示图像
- # plt.colorbar(format='%+02.0f dB')
- # plt.title('Mel spectrogram')
- plt.tight_layout()
- plt.show()
-
- #######################################################################
- # Z-score后的对数梅尔谱图
- import pickle
-
- feat_mtx = []
- with open('bus-barcelona-15-599-a.logmel', 'rb') as f:
- temp=pickle.load(f, encoding='latin1')
- feat_mtx.append(temp['feat_data'])
-
- logmel = np.squeeze(feat_mtx, axis=(0, 3))
-
- plt.figure(figsize=(16, 9))
- librosa.display.specshow(logmel, sr=44100, cmap='jet') # cmap='gray'
-
- plt.savefig('1.png')
- # 显示图像
- plt.tight_layout()
- plt.show()
-
-
- ################################### 横向柱形图 #####################################
- import matplotlib.pyplot as plt
- from matplotlib import rcParams
-
- rcParams.update({'font.size': 24})
-
- plt.rcParams['axes.unicode_minus'] = False # 解决负号'-'显示为方块的问题
-
- # 定义数据
- methods = ['四种方法组合', '三种方法组合', '两种方法组合', '音频数据增强', '频谱矫正', '频谱增强', '$\mathrm{Mixup}$数据增强', '基线']
- accuracies = [53.8, 50.2, 48.8, 49.0, 47.6, 47.1, 47.9, 47.0]
- len_x = len(methods)
-
- # 设置图形尺寸
- plt.figure(figsize=(16, 9))
-
- plt.barh(range(len_x), accuracies, align='center', height=0.5)
- texts = plt.yticks(range(len_x), methods, fontproperties='SimSun')[1]
- for text in texts:
- text.set_fontsize(26)
- plt.xlim(42.0, 56.0)
- plt.gca().set_xticklabels(['{:.0f}%'.format(x) for x in plt.gca().get_xticks()])
-
- # 添加标签
- for i, v in enumerate(accuracies):
- plt.text(v + 0.8, i, str(v)+'%', ha='center', va='center')
-
- plt.legend(['平均分类准确率'], loc='lower center', bbox_to_anchor=(0.5, -0.175), prop={'family': 'SimSun', 'size': 26},
- facecolor='none', edgecolor='none')
-
- plt.savefig('1.png')
-
- # 显示图形
- plt.show()
-
- ################################### 混淆矩阵 #####################################
- import matplotlib.pyplot as plt
- import numpy as np
-
- def plot_confusion_matrix(cm, classes=['机场', '公交车', '地铁', '地铁站', '公园', '广场', '商场', '步行街', '街道', '电车'],
- normalize=False,
- cmap=plt.cm.Blues,
- png_name = 'base_origin_speccor.png'):
-
- if normalize is True:
- cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
- fig, ax = plt.subplots(figsize=(10, 10))
- im = ax.imshow(cm, interpolation='nearest', cmap=cmap)
- ax.figure.colorbar(im, ax=ax, fraction=0.046, pad=0.04)
- # We want to show all ticks...
- ax.set(xticks=np.arange(cm.shape[1]),
- yticks=np.arange(cm.shape[0]))
- ax.set_xticklabels(classes, fontproperties='SimSun')
- ax.set_yticklabels(classes, fontproperties='SimSun')
-
- ax.tick_params(axis='y', which='both', labelsize=22)
- ax.tick_params(axis='x', which='both', labelsize=22, top=True, bottom=False, labeltop=True, labelbottom=False, rotation=45)
- plt.setp(ax.get_xticklabels(), ha="left")
-
- # Loop over data dimensions and create text annotations.
- if normalize is True:
- fmt = '.3f'
- else:
- fmt = 'd'
- thresh = cm.max() / 2.
- for i in range(cm.shape[0]):
- for j in range(cm.shape[1]):
- ax.text(j, i, format(cm[i, j], fmt),
- ha="center", va="center",
- color="white" if cm[i, j] > thresh else "black",
- fontsize=15)
-
- fig.tight_layout()
- plt.savefig(png_name)
- return
-
- confusion_matrix =[[172, 0, 1, 11, 2, 2, 51, 57, 0, 1],
- [ 0, 258, 13, 0, 7, 0, 0, 0, 2, 17],
- [ 4, 14, 217, 9, 3, 2, 0, 1, 2, 45],
- [ 17, 3, 19, 218, 2, 1, 27, 8, 1, 1],
- [ 4, 0, 3, 1, 234, 29, 0, 4, 19, 3],
- [ 8, 0, 2, 7, 53, 170, 8, 22, 27, 0],
- [ 43, 0, 0, 23, 0, 5, 195, 30, 0, 1],
- [ 29, 2, 1, 21, 6, 32, 50, 151, 5, 0],
- [ 2, 3, 2, 2, 16, 18, 0, 3, 251, 0],
- [ 0, 19, 49, 3, 9, 4, 0, 0, 2, 211]]
-
- cm = np.array(confusion_matrix)
-
- plot_confusion_matrix(cm)
|