-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathMidTermFeatures.py
484 lines (416 loc) · 19.6 KB
/
MidTermFeatures.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
from __future__ import print_function
import os
import time
import glob
import numpy as np
import matplotlib.pyplot as plt
from pyAudioAnalysis import utilities
from pyAudioAnalysis import audioBasicIO
from pyAudioAnalysis import ShortTermFeatures
eps = 0.00000001
""" Time-domain audio features """
def beat_extraction(short_features, window_size, plot=False):
"""
This function extracts an estimate of the beat rate for a musical signal.
ARGUMENTS:
- short_features: a np array (n_feats x numOfShortTermWindows)
- window_size: window size in seconds
RETURNS:
- bpm: estimates of beats per minute
- ratio: a confidence measure
"""
# Features that are related to the beat tracking task:
selected_features = [0, 1, 3, 4, 5, 6, 7, 8, 9, 10,
11, 12, 13, 14, 15, 16, 17, 18]
max_beat_time = int(round(2.0 / window_size))
hist_all = np.zeros((max_beat_time,))
# for each feature
for ii, i in enumerate(selected_features):
# dif threshold (3 x Mean of Difs)
dif_threshold = 2.0 * (np.abs(short_features[i, 0:-1] -
short_features[i, 1::])).mean()
if dif_threshold <= 0:
dif_threshold = 0.0000000000000001
# detect local maxima
[pos1, _] = utilities.peakdet(short_features[i, :], dif_threshold)
position_diffs = []
# compute histograms of local maxima changes
for j in range(len(pos1)-1):
position_diffs.append(pos1[j+1]-pos1[j])
histogram_times, histogram_edges = \
np.histogram(position_diffs, np.arange(0.5, max_beat_time + 1.5))
hist_centers = (histogram_edges[0:-1] + histogram_edges[1::]) / 2.0
histogram_times = \
histogram_times.astype(float) / short_features.shape[1]
hist_all += histogram_times
if plot:
plt.subplot(9, 2, ii + 1)
plt.plot(short_features[i, :], 'k')
for k in pos1:
plt.plot(k, short_features[i, k], 'k*')
f1 = plt.gca()
f1.axes.get_xaxis().set_ticks([])
f1.axes.get_yaxis().set_ticks([])
if plot:
plt.show(block=False)
plt.figure()
# Get beat as the argmax of the agregated histogram:
max_indices = np.argmax(hist_all)
bpms = 60 / (hist_centers * window_size)
bpm = bpms[max_indices]
# ... and the beat ratio:
ratio = hist_all[max_indices] / (hist_all.sum() + eps)
if plot:
# filter out >500 beats from plotting:
hist_all = hist_all[bpms < 500]
bpms = bpms[bpms < 500]
plt.plot(bpms, hist_all, 'k')
plt.xlabel('Beats per minute')
plt.ylabel('Freq Count')
plt.show(block=True)
return bpm, ratio
def mid_feature_extraction(signal, sampling_rate, mid_window, mid_step,
short_window, short_step):
"""
Mid-term feature extraction
"""
short_features, short_feature_names = \
ShortTermFeatures.feature_extraction(signal, sampling_rate,
short_window, short_step)
n_stats = 2
n_feats = len(short_features)
mid_window_ratio = int(round(mid_window / short_step))
mt_step_ratio = int(round(mid_step / short_step))
mid_features, mid_feature_names = [], []
for i in range(n_stats * n_feats):
mid_features.append([])
mid_feature_names.append("")
# for each of the short-term features:
for i in range(n_feats):
cur_position = 0
num_short_features = len(short_features[i])
mid_feature_names[i] = short_feature_names[i] + "_" + "mean"
mid_feature_names[i + n_feats] = short_feature_names[i] + "_" + "std"
while cur_position < num_short_features:
end = cur_position + mid_window_ratio
if end > num_short_features:
end = num_short_features
cur_st_feats = short_features[i][cur_position:end]
mid_features[i].append(np.mean(cur_st_feats))
mid_features[i + n_feats].append(np.std(cur_st_feats))
cur_position += mt_step_ratio
mid_features = np.array(mid_features)
mid_features = np.nan_to_num(mid_features)
return mid_features, short_features, mid_feature_names
""" Feature Extraction Wrappers
- The first two feature extraction wrappers are used to extract
long-term averaged audio features for a list of WAV files stored in a
given category.
It is important to note that, one single feature is extracted per WAV
file (not the whole sequence of feature vectors)
"""
def directory_feature_extraction(folder_path, mid_window, mid_step,
short_window, short_step,
compute_beat=True):
"""
This function extracts the mid-term features of the WAVE files of a
particular folder.
The resulting feature vector is extracted by long-term averaging the
mid-term features.
Therefore ONE FEATURE VECTOR is extracted for each WAV file.
ARGUMENTS:
- folder_path: the path of the WAVE directory
- mid_window, mid_step: mid-term window and step (in seconds)
- short_window, short_step: short-term window and step (in seconds)
"""
mid_term_features = np.array([])
process_times = []
types = ('*.wav', '*.aif', '*.aiff', '*.mp3', '*.au', '*.ogg')
wav_file_list = []
for files in types:
wav_file_list.extend(glob.glob(os.path.join(folder_path, files)))
wav_file_list = sorted(wav_file_list)
wav_file_list2, mid_feature_names = [], []
for i, file_path in enumerate(wav_file_list):
print("Analyzing file {0:d} of {1:d}: {2:s}".format(i + 1,
len(wav_file_list),
file_path))
if os.stat(file_path).st_size == 0:
print(" (EMPTY FILE -- SKIPPING)")
continue
sampling_rate, signal = audioBasicIO.read_audio_file(file_path)
if sampling_rate == 0:
continue
t1 = time.time()
signal = audioBasicIO.stereo_to_mono(signal)
if signal.shape[0] < float(sampling_rate)/5:
print(" (AUDIO FILE TOO SMALL - SKIPPING)")
continue
wav_file_list2.append(file_path)
if compute_beat:
mid_features, short_features, mid_feature_names = \
mid_feature_extraction(signal, sampling_rate,
round(mid_window * sampling_rate),
round(mid_step * sampling_rate),
round(sampling_rate * short_window),
round(sampling_rate * short_step))
beat, beat_conf = beat_extraction(short_features, short_step)
else:
mid_features, _, mid_feature_names = \
mid_feature_extraction(signal, sampling_rate,
round(mid_window * sampling_rate),
round(mid_step * sampling_rate),
round(sampling_rate * short_window),
round(sampling_rate * short_step))
mid_features = np.transpose(mid_features)
mid_features = mid_features.mean(axis=0)
# long term averaging of mid-term statistics
if (not np.isnan(mid_features).any()) and \
(not np.isinf(mid_features).any()):
if compute_beat:
mid_features = np.append(mid_features, beat)
mid_features = np.append(mid_features, beat_conf)
if len(mid_term_features) == 0:
# append feature vector
mid_term_features = mid_features
else:
mid_term_features = np.vstack((mid_term_features, mid_features))
t2 = time.time()
duration = float(len(signal)) / sampling_rate
process_times.append((t2 - t1) / duration)
if len(process_times) > 0:
print("Feature extraction complexity ratio: "
"{0:.1f} x realtime".format((1.0 /
np.mean(np.array(process_times)))))
return mid_term_features, wav_file_list2, mid_feature_names
def multiple_directory_feature_extraction(path_list, mid_window, mid_step,
short_window, short_step,
compute_beat=False):
"""
Same as dirWavFeatureExtraction, but instead of a single dir it
takes a list of paths as input and returns a list of feature matrices.
EXAMPLE:
[features, classNames] =
a.dirsWavFeatureExtraction(['audioData/classSegmentsRec/noise',
'audioData/classSegmentsRec/speech',
'audioData/classSegmentsRec/brush-teeth',
'audioData/classSegmentsRec/shower'], 1,
1, 0.02, 0.02);
It can be used during the training process of a classification model ,
in order to get feature matrices from various audio classes (each stored in
a separate path)
"""
# feature extraction for each class:
features = []
class_names = []
file_names = []
for i, d in enumerate(path_list):
f, fn, feature_names = \
directory_feature_extraction(d, mid_window, mid_step,
short_window, short_step,
compute_beat=compute_beat)
if f.shape[0] > 0:
# if at least one audio file has been found in the provided folder:
features.append(f)
file_names.append(fn)
if d[-1] == os.sep:
class_names.append(d.split(os.sep)[-2])
else:
class_names.append(d.split(os.sep)[-1])
return features, class_names, file_names
def multiple_directory_feature_extraction_no_avg(path_list, mid_window, mid_step,
short_window, short_step
):
print('Start multiple directory non averaged feature extraction...')
features = []
class_names = []
file_names = []
for i, d in enumerate(path_list):
#f = features ,fn = file list
f, _,fn, = \
directory_feature_extraction_no_avg(d, mid_window, mid_step,
short_window, short_step
)
if f.shape[0] > 0:
# if at least one audio file has been found in the provided folder:
features.append(f)
file_names.append(fn)
if d[-1] == os.sep:
class_names.append(d.split(os.sep)[-2])
else:
class_names.append(d.split(os.sep)[-1])
print('Extraction Done...')
return features, class_names, file_names
def directory_feature_extraction_no_avg(folder_path, mid_window, mid_step,
short_window, short_step):
"""
This function extracts the mid-term features of the WAVE
files of a particular folder without averaging each file.
ARGUMENTS:
- folder_path: the path of the WAVE directory
- mid_window, mid_step: mid-term window and step (in seconds)
- short_window, short_step: short-term window and step (in seconds)
RETURNS:
- X: A feature matrix
- Y: A matrix of file labels
- filenames:
"""
wav_file_list = []
signal_idx = np.array([])
mid_features = np.array([])
types = ('*.wav', '*.aif', '*.aiff', '*.ogg')
for files in types:
wav_file_list.extend(glob.glob(os.path.join(folder_path, files)))
wav_file_list = sorted(wav_file_list)
# wav_file_list contains all the wav files in the directory as a list.
for i, file_path in enumerate(wav_file_list):
sampling_rate, signal = audioBasicIO.read_audio_file(file_path)
if sampling_rate == 0:
continue
signal = audioBasicIO.stereo_to_mono(signal)
mid_feature_vector,short_features, _ = \
mid_feature_extraction(signal, sampling_rate,
round(mid_window * sampling_rate),
round(mid_step * sampling_rate),
round(sampling_rate * short_window),
round(sampling_rate * short_step))
mid_feature_vector = np.transpose(mid_feature_vector)
print(mid_feature_vector.shape)
if len(mid_features) == 0: # append feature vector
mid_features = mid_feature_vector
signal_idx = np.zeros((mid_feature_vector.shape[0], ))
else:
mid_features = np.vstack((mid_features, mid_feature_vector))
signal_idx = np.append(signal_idx, i *
np.ones((mid_feature_vector.shape[0], )))
return mid_features, signal_idx, wav_file_list
def directory_feature_extraction_no_avg_3D(folder_path, mid_window, mid_step,
short_window, short_step,window_size):
# Modified function from direct_feature_extraction_no_avg by LXR
# This function aims to extract
wav_file_list = []
signal_idx = np.array([])
mid_features = np.array([])
features_in_3D_array = np.array([])
types = ('*.wav', '*.aif', '*.aiff', '*.ogg')
for files in types:
wav_file_list.extend(glob.glob(os.path.join(folder_path, files)))
wav_file_list = sorted(wav_file_list)
for i, file_path in enumerate(wav_file_list):
sampling_rate, signal = audioBasicIO.read_audio_file(file_path)
if sampling_rate == 0:
print('sampling_rate is zero so skip one ')
continue
signal = audioBasicIO.stereo_to_mono(signal)
mid_feature_vector,short_features, _ = \
mid_feature_extraction(signal, sampling_rate,
round(mid_window * sampling_rate),
round(mid_step * sampling_rate),
round(sampling_rate * short_window),
round(sampling_rate * short_step))
mid_feature_vector = np.transpose(mid_feature_vector)
# Padding
current_window_size = mid_feature_vector.shape[0]
if current_window_size<window_size:
pads = np.zeros((window_size-current_window_size,136))
mid_feature_vector = np.concatenate((mid_feature_vector,pads))
elif current_window_size>window_size:
print('The audio is too long so truncated')
mid_feature_vector = mid_feature_vector[0:window_size,:]
# Concatenate all feature sets
if len(mid_features) == 0: # append feature vector
mid_features = mid_feature_vector
else:
mid_features = np.vstack((mid_features, mid_feature_vector))
# Turn it into a 3D array
features_in_3D_array = np.reshape(mid_features,(-1,window_size,136))
return features_in_3D_array,wav_file_list
def multiple_directory_3Dfeature_extraction_no_avg(path_list, mid_window, mid_step,
short_window, short_step,window_size=10
):
print('Start multiple directory non averaged feature extraction in 3D dimension...')
features = []
class_names = []
file_names = []
for i, d in enumerate(path_list):
#f = features ,fn = file list
f,fn = \
directory_feature_extraction_no_avg_3D(d, mid_window, mid_step,
short_window, short_step,window_size
)
if f.shape[0] > 0:
# if at least one audio file has been found in the provided folder:
features.append(f)
file_names.append(fn)
if d[-1] == os.sep:
class_names.append(d.split(os.sep)[-2])
else:
class_names.append(d.split(os.sep)[-1])
print('extraction done for one directory')
print('Extraction Done...')
return features, class_names, file_names
"""
The following two feature extraction wrappers extract features for given audio
files, however NO LONG-TERM AVERAGING is performed. Therefore, the output for
each audio file is NOT A SINGLE FEATURE VECTOR but a whole feature matrix.
Also, another difference between the following two wrappers and the previous
is that they NO LONG-TERM AVERAGING IS PERFORMED. In other words, the WAV
files in these functions are not used as uniform samples that need to be
averaged but as sequences
"""
def mid_feature_extraction_to_file(file_path, mid_window, mid_step,
short_window, short_step, output_file,
store_short_features=False, store_csv=False,
plot=False):
"""
This function is used as a wrapper to:
a) read the content of a WAV file
b) perform mid-term feature extraction on that signal
c) write the mid-term feature sequences to a np file
"""
sampling_rate, signal = audioBasicIO.read_audio_file(file_path)
signal = audioBasicIO.stereo_to_mono(signal)
if store_short_features:
mid_features, short_features, _ = \
mid_feature_extraction(signal, sampling_rate,
round(sampling_rate * mid_window),
round(sampling_rate * mid_step),
round(sampling_rate * short_window),
(sampling_rate * short_step))
# save st features to np file
np.save(output_file + "_st", short_features)
if plot:
print("Short-term np file: " + output_file + "_st.npy saved")
if store_csv:
# store st features to CSV file
np.savetxt(output_file + "_st.csv", short_features.T, delimiter=",")
if plot:
print("Short-term CSV file: " + output_file + "_st.csv saved")
else:
mid_features, _, _ = \
mid_feature_extraction(signal, sampling_rate,
round(sampling_rate * mid_window),
round(sampling_rate * mid_step),
round(sampling_rate * short_window),
round(sampling_rate * short_step))
# save mt features to np file
np.save(output_file, mid_features)
if plot:
print("Mid-term np file: " + output_file + ".npy saved")
if store_csv:
np.savetxt(output_file + ".csv", mid_features.T, delimiter=",")
if plot:
print("Mid-term CSV file: " + output_file + ".csv saved")
def mid_feature_extraction_file_dir(folder_path, mid_window, mid_step,
short_window, short_step,
store_short_features=False, store_csv=False,
plot=False):
types = (folder_path + os.sep + '*.wav',)
files_list = []
for t in types:
files_list.extend(glob.glob(t))
for f in files_list:
output_path = f
mid_feature_extraction_to_file(f, mid_window, mid_step, short_window,
short_step, output_path,
store_short_features, store_csv, plot)