-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathCreateDataset.py
121 lines (102 loc) · 4.7 KB
/
CreateDataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
import librosa
import numpy
import pandas
import os
import sklearn
import config
def main():
samp_rate = config.CreateDataset.SAMPLING_RATE
frame_size = config.CreateDataset.FRAME_SIZE
hop_size = config.CreateDataset.HOP_SIZE
dataset_dir = config.CreateDataset.DATASET_DIRECTORY
sub_folders = get_subdirectories(dataset_dir)
labels = []
is_created = False
print("Extracting features from audios...")
for sub_folder in sub_folders:
print(".....Working in folder:", sub_folder)
sample_arrays = get_sample_arrays(dataset_dir, sub_folder, samp_rate)
for sample_array in sample_arrays:
row = extract_features(sample_array, samp_rate, frame_size, hop_size)
if not is_created:
dataset_numpy = numpy.array(row)
is_created = True
elif is_created:
dataset_numpy = numpy.vstack((dataset_numpy, row))
labels.append(sub_folder)
print("Normalizing the data...")
scaler = sklearn.preprocessing.MinMaxScaler(feature_range=(-1, 1))
dataset_numpy = scaler.fit_transform(dataset_numpy)
Feature_Names = ['meanZCR', 'stdZCR', 'meanSpecCentroid', 'stdSpecCentroid', 'meanSpecContrast', 'stdSpecContrast',
'meanSpecBandwidth', 'stdSpecBandwidth', 'meanSpecRollof', 'stdSpecRollof',
'meanMFCC_1', 'stdMFCC_1', 'meanMFCC_2', 'stdMFCC_2', 'meanMFCC_3', 'stdMFCC_3',
'meanMFCC_4', 'stdMFCC_4', 'meanMFCC_5', 'stdMFCC_5', 'meanMFCC_6', 'stdMFCC_6',
'meanMFCC_7', 'stdMFCC_7', 'meanMFCC_8', 'stdMFCC_8', 'meanMFCC_9', 'stdMFCC_9',
'meanMFCC_10', 'stdMFCC_10', 'meanMFCC_11', 'stdMFCC_11', 'meanMFCC_12', 'stdMFCC_12',
'meanMFCC_13', 'stdMFCC_13'
]
dataset_pandas = pandas.DataFrame(dataset_numpy, columns=Feature_Names)
dataset_pandas["genre"] = labels
dataset_pandas.to_csv("data_set.csv", index=False)
print("Data set has been created and sent to the project folder!")
def get_subdirectories(a_dir):
return [name for name in os.listdir(a_dir)
if os.path.isdir(os.path.join(a_dir, name))]
def get_sample_arrays(dataset_dir, folder_name, samp_rate):
path_of_audios = librosa.util.find_files(dataset_dir + "/" + folder_name)
audios = []
for audio in path_of_audios:
x, sr = librosa.load(audio, sr=samp_rate, duration=5.0)
audios.append(x)
audios_numpy = numpy.array(audios)
return audios_numpy
def extract_features(signal, sample_rate, frame_size, hop_size):
zero_crossing_rate = librosa.feature.zero_crossing_rate(y=signal, frame_length=frame_size, hop_length=hop_size)
spectral_centroid = librosa.feature.spectral_centroid(y=signal, sr=sample_rate, n_fft=frame_size,
hop_length=hop_size)
spectral_contrast = librosa.feature.spectral_contrast(y=signal, sr=sample_rate, n_fft=frame_size,
hop_length=hop_size)
spectral_bandwidth = librosa.feature.spectral_bandwidth(y=signal, sr=sample_rate, n_fft=frame_size,
hop_length=hop_size)
spectral_rolloff = librosa.feature.spectral_rolloff(y=signal, sr=sample_rate, n_fft=frame_size, hop_length=hop_size)
mfccs = librosa.feature.mfcc(y=signal, sr=sample_rate, n_fft=frame_size, hop_length=hop_size)
return [
numpy.mean(zero_crossing_rate),
numpy.std(zero_crossing_rate),
numpy.mean(spectral_centroid),
numpy.std(spectral_centroid),
numpy.mean(spectral_contrast),
numpy.std(spectral_contrast),
numpy.mean(spectral_bandwidth),
numpy.std(spectral_bandwidth),
numpy.mean(spectral_rolloff),
numpy.std(spectral_rolloff),
numpy.mean(mfccs[1, :]),
numpy.std(mfccs[1, :]),
numpy.mean(mfccs[2, :]),
numpy.std(mfccs[2, :]),
numpy.mean(mfccs[3, :]),
numpy.std(mfccs[3, :]),
numpy.mean(mfccs[4, :]),
numpy.std(mfccs[4, :]),
numpy.mean(mfccs[5, :]),
numpy.std(mfccs[5, :]),
numpy.mean(mfccs[6, :]),
numpy.std(mfccs[6, :]),
numpy.mean(mfccs[7, :]),
numpy.std(mfccs[7, :]),
numpy.mean(mfccs[8, :]),
numpy.std(mfccs[8, :]),
numpy.mean(mfccs[9, :]),
numpy.std(mfccs[9, :]),
numpy.mean(mfccs[10, :]),
numpy.std(mfccs[10, :]),
numpy.mean(mfccs[11, :]),
numpy.std(mfccs[11, :]),
numpy.mean(mfccs[12, :]),
numpy.std(mfccs[12, :]),
numpy.mean(mfccs[13, :]),
numpy.std(mfccs[13, :]),
]
if __name__ == '__main__':
main()