-
Notifications
You must be signed in to change notification settings - Fork 233
/
utils.py
128 lines (113 loc) · 4.27 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
import soundfile
import librosa
import numpy as np
import pickle
import os
from convert_wavs import convert_audio
AVAILABLE_EMOTIONS = {
"neutral",
"calm",
"happy",
"sad",
"angry",
"fear",
"disgust",
"ps", # pleasant surprised
"boredom"
}
def get_label(audio_config):
"""Returns label corresponding to which features are to be extracted
e.g:
audio_config = {'mfcc': True, 'chroma': True, 'contrast': False, 'tonnetz': False, 'mel': False}
get_label(audio_config): 'mfcc-chroma'
"""
features = ["mfcc", "chroma", "mel", "contrast", "tonnetz"]
label = ""
for feature in features:
if audio_config[feature]:
label += f"{feature}-"
return label.rstrip("-")
def get_dropout_str(dropout, n_layers=3):
if isinstance(dropout, list):
return "_".join([ str(d) for d in dropout])
elif isinstance(dropout, float):
return "_".join([ str(dropout) for i in range(n_layers) ])
def get_first_letters(emotions):
return "".join(sorted([ e[0].upper() for e in emotions ]))
def extract_feature(file_name, **kwargs):
"""
Extract feature from audio file `file_name`
Features supported:
- MFCC (mfcc)
- Chroma (chroma)
- MEL Spectrogram Frequency (mel)
- Contrast (contrast)
- Tonnetz (tonnetz)
e.g:
`features = extract_feature(path, mel=True, mfcc=True)`
"""
mfcc = kwargs.get("mfcc")
chroma = kwargs.get("chroma")
mel = kwargs.get("mel")
contrast = kwargs.get("contrast")
tonnetz = kwargs.get("tonnetz")
try:
with soundfile.SoundFile(file_name) as sound_file:
pass
except RuntimeError:
# not properly formated, convert to 16000 sample rate & mono channel using ffmpeg
# get the basename
basename = os.path.basename(file_name)
dirname = os.path.dirname(file_name)
name, ext = os.path.splitext(basename)
new_basename = f"{name}_c.wav"
new_filename = os.path.join(dirname, new_basename)
v = convert_audio(file_name, new_filename)
if v:
raise NotImplementedError("Converting the audio files failed, make sure `ffmpeg` is installed in your machine and added to PATH.")
else:
new_filename = file_name
with soundfile.SoundFile(new_filename) as sound_file:
X = sound_file.read(dtype="float32")
sample_rate = sound_file.samplerate
if chroma or contrast:
stft = np.abs(librosa.stft(X))
result = np.array([])
if mfcc:
mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T, axis=0)
result = np.hstack((result, mfccs))
if chroma:
chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
result = np.hstack((result, chroma))
if mel:
mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0)
result = np.hstack((result, mel))
if contrast:
contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T,axis=0)
result = np.hstack((result, contrast))
if tonnetz:
tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X), sr=sample_rate).T,axis=0)
result = np.hstack((result, tonnetz))
return result
def get_best_estimators(classification):
"""
Loads the estimators that are pickled in `grid` folder
Note that if you want to use different or more estimators,
you can fine tune the parameters in `grid_search.py` script
and run it again ( may take hours )
"""
if classification:
return pickle.load(open("grid/best_classifiers.pickle", "rb"))
else:
return pickle.load(open("grid/best_regressors.pickle", "rb"))
def get_audio_config(features_list):
"""
Converts a list of features into a dictionary understandable by
`data_extractor.AudioExtractor` class
"""
audio_config = {'mfcc': False, 'chroma': False, 'mel': False, 'contrast': False, 'tonnetz': False}
for feature in features_list:
if feature not in audio_config:
raise TypeError(f"Feature passed: {feature} is not recognized.")
audio_config[feature] = True
return audio_config