forked from x4nth055/gender-recognition-by-voice
-
Notifications
You must be signed in to change notification settings - Fork 0
/
preparation.py
87 lines (80 loc) · 3.32 KB
/
preparation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
import glob
import os
import pandas as pd
import numpy as np
import shutil
import librosa
from tqdm import tqdm
def extract_feature(file_name, **kwargs):
"""
Extract feature from audio file `file_name`
Features supported:
- MFCC (mfcc)
- Chroma (chroma)
- MEL Spectrogram Frequency (mel)
- Contrast (contrast)
- Tonnetz (tonnetz)
e.g:
`features = extract_feature(path, mel=True, mfcc=True)`
"""
mfcc = kwargs.get("mfcc")
chroma = kwargs.get("chroma")
mel = kwargs.get("mel")
contrast = kwargs.get("contrast")
tonnetz = kwargs.get("tonnetz")
X, sample_rate = librosa.core.load(file_name)
if chroma or contrast:
stft = np.abs(librosa.stft(X))
result = np.array([])
if mfcc:
mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T, axis=0)
result = np.hstack((result, mfccs))
if chroma:
chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
result = np.hstack((result, chroma))
if mel:
mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0)
result = np.hstack((result, mel))
if contrast:
contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T,axis=0)
result = np.hstack((result, contrast))
if tonnetz:
tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X), sr=sample_rate).T,axis=0)
result = np.hstack((result, tonnetz))
return result
dirname = "data"
if not os.path.isdir(dirname):
os.mkdir(dirname)
csv_files = glob.glob("*.csv")
for j, csv_file in enumerate(csv_files):
print("[+] Preprocessing", csv_file)
df = pd.read_csv(csv_file)
# only take filename and gender columns
new_df = df[["filename", "gender"]]
print("Previously:", len(new_df), "rows")
# take only male & female genders (i.e droping NaNs & 'other' gender)
new_df = new_df[np.logical_or(new_df['gender'] == 'female', new_df['gender'] == 'male')]
print("Now:", len(new_df), "rows")
new_csv_file = os.path.join(dirname, csv_file)
# save new preprocessed CSV
new_df.to_csv(new_csv_file, index=False)
# get the folder name
folder_name, _ = csv_file.split(".")
audio_files = glob.glob(f"{folder_name}/{folder_name}/*")
all_audio_filenames = set(new_df["filename"])
for i, audio_file in tqdm(list(enumerate(audio_files)), f"Extracting features of {folder_name}"):
splited = os.path.split(audio_file)
# audio_filename = os.path.join(os.path.split(splited[0])[-1], splited[-1])
audio_filename = f"{os.path.split(splited[0])[-1]}/{splited[-1]}"
# print("audio_filename:", audio_filename)
if audio_filename in all_audio_filenames:
# print("Copyying", audio_filename, "...")
src_path = f"{folder_name}/{audio_filename}"
target_path = f"{dirname}/{audio_filename}"
#create that folder if it doesn't exist
if not os.path.isdir(os.path.dirname(target_path)):
os.mkdir(os.path.dirname(target_path))
features = extract_feature(src_path, mel=True)
target_filename = target_path.split(".")[0]
np.save(target_filename, features)
# shutil.copyfile(src_path, target_path)