-
Notifications
You must be signed in to change notification settings - Fork 1
/
ex_situ.py
78 lines (72 loc) · 3.7 KB
/
ex_situ.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import sys as sys
import matplotlib.pyplot as plt
import matplotlib
from matplotlib import cm
from scipy.io import wavfile
import numpy as np
import evfuncs
from pydub import AudioSegment
import string
import random
from scipy import signal
from pptx import Presentation
from pptx.util import Inches
import os
import csv
import math
import time
import scipy.io
def make_spectrogram(wav_file):
rate, data = wavfile.read(wav_file) #convert wav file to raw audio data
fig,ax = plt.subplots(1) #create a new subplot within the matplotlib figure, to house the spectrogram
fig.patch.set_visible(False) #get rid of white space around spectrograms -- from https://stackoverflow.com/questions/14908576/how-to-remove-frame-from-matplotlib-pyplot-figure-vs-matplotlib-figure-frame
ax.axis('off') #turn off x (time) and y (frequency) axes
pxx, freqs, bins, im = ax.specgram(x=data, Fs=rate, noverlap=511, NFFT=512,cmap=cm.jet, scale='dB')
ax.set_ylim([0,10000])
fig.set_size_inches(len(data)/2400,5) #scale the figure by the length (in time) of the audio sample being plotted, so that output images for longer samples are wider.
text_box = matplotlib.widgets.TextBox(ax,'Enter label:')
text_box.on_submit(submit)
plt.show()
return {'ax':ax,'fig':fig,'pxx':pxx}
def submit(input_str):
global label
label = input_str
directory=sys.argv[1]
songs_dict = {}
label = '-'
notmat_lines_list = list(open(directory+'notbatch')) #extract a list of labelled songfiles
notmat_list = [directory+value[:value.index('\n')] for value in notmat_lines_list] #extract a list of labelled songfiles
spectrograms_created = 0 #keeps a count of how many spectrograms have been generated
f=0 #keeps track of how many songfiles have been processed
for file in notmat_list:
if label == 'quit':
break
ndict = evfuncs.load_notmat(file) #loads variables stored in the notmat file pertaining to the current songfile
wav_file = file.strip('.not.mat') #the wav file corresponding to the current notmat file should be the same, minus the ".not.mat" at the end
file_audio = AudioSegment.from_wav(wav_file) #create an AudioSegment object of the relevant wave file, so that it can be sliced up.
i=1 #keeps track of how many spectrograms have been generated for the current songfile
#print(type(ndict['onsets']))
if type(ndict['onsets']) is not int:
indices = list(range(len(ndict['onsets'])))
random.shuffle(indices)
for i in indices: #iterate through the labelled syllable tokens in the songfile
onset=ndict['onsets'][i] #retrieve the onset time (in milliseconds) of the syllable within the file
offset=ndict['offsets'][i]
syl_snippet = file_audio[onset-20:offset+20] #create an AudioSegment that begins "prepad" ms before the syllable begins, and ends "postpad" milliseconds after it ends, where those two parameters are specified by the user.
ID = str(f)+'_'+str(i) #create an arbitrary ID for the current syllable, by concatenating the number of songs that have been processed and the number of syllables processed in the current song, with an underscore in between.
snippet_filename = directory+'syl'+'.wav'
syl_snippet.export(snippet_filename,format='wav')
ax = make_spectrogram(snippet_filename)['ax']
label = input('Syllable '+str(i)+' of '+str(len(ndict['onsets']))+' in '+file+'. Label:')
if label == 'quit':
break
a=list(ndict['labels'])
a[i] = label
ndict['labels'] = ''
for character in a:
ndict['labels']+=character
scipy.io.savemat(file,ndict)
i+=1
else:
pass
print(ndict['labels'])