Spaces:
Sleeping
Sleeping
import os | |
import matplotlib.pyplot as plt | |
import numpy as np | |
import librosa | |
import panns_inference | |
from panns_inference import AudioTagging, SoundEventDetection, labels | |
def print_audio_tagging_result(clipwise_output): | |
"""Visualization of audio tagging result. | |
Args: | |
clipwise_output: (classes_num,) | |
""" | |
sorted_indexes = np.argsort(clipwise_output)[::-1] | |
# Print audio tagging top probabilities | |
for k in range(10): | |
print('{}: {:.3f}'.format(np.array(labels)[sorted_indexes[k]], | |
clipwise_output[sorted_indexes[k]])) | |
def plot_sound_event_detection_result(framewise_output): | |
"""Visualization of sound event detection result. | |
Args: | |
framewise_output: (time_steps, classes_num) | |
""" | |
out_fig_path = 'results/sed_result.png' | |
os.makedirs(os.path.dirname(out_fig_path), exist_ok=True) | |
classwise_output = np.max(framewise_output, axis=0) # (classes_num,) | |
idxes = np.argsort(classwise_output)[::-1] | |
idxes = idxes[0:5] | |
ix_to_lb = {i : label for i, label in enumerate(labels)} | |
lines = [] | |
for idx in idxes: | |
line, = plt.plot(framewise_output[:, idx], label=ix_to_lb[idx]) | |
lines.append(line) | |
plt.legend(handles=lines) | |
plt.xlabel('Frames') | |
plt.ylabel('Probability') | |
plt.ylim(0, 1.) | |
plt.savefig(out_fig_path) | |
print('Save fig to {}'.format(out_fig_path)) | |
if __name__ == '__main__': | |
"""Example of using panns_inferece for audio tagging and sound evetn detection. | |
""" | |
device = 'cpu' # 'cuda' | 'cpu' | |
audio_path = 'resources/R9_ZSCveAHg_7s.wav' | |
(audio, _) = librosa.core.load(audio_path, sr=32000, mono=True) | |
#print(audio) | |
plt.plot(audio) | |
plt.savefig('sample.png') | |
audio = audio[None, :] # (batch_size, segment_samples) | |
#print(audio) | |
print('------ Audio tagging ------') | |
at = AudioTagging(checkpoint_path=None, device=device) | |
(clipwise_output, embedding) = at.inference(audio) | |
"""clipwise_output: (batch_size, classes_num), embedding: (batch_size, embedding_size)""" | |
print_audio_tagging_result(clipwise_output[0]) | |
print('------ Sound event detection ------') | |
sed = SoundEventDetection(checkpoint_path=None, device=device) | |
framewise_output = sed.inference(audio) | |
"""(batch_size, time_steps, classes_num)""" | |
plot_sound_event_detection_result(framewise_output[0]) |