Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Updated RMS, ZCR features #21

Open
wants to merge 21 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
126 changes: 67 additions & 59 deletions music_feats/features/extractor.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from __future__ import division
import math
import librosa
# import librosa
import numpy as np
import scipy as sp
from music_feats.features.util.utils import *
Expand All @@ -20,27 +20,28 @@
'fluctuationCentroid',
'MPS']

def rms(y, sr=44100, win_length=0.05, hop_length=None,
pad=None, decomposition=True):
def rms(y, sr=44100, win_length=0.05, hop_length=None, pad=None,
decomposition=True):
'''
Calculate root-mean-square energy from a time-series signal
:usage:
>>> # Load a file
>>> y, sr = librosa.load('file.mp3')
>>> # Calculate the RMS of a time-series
>>> rms = extractor.rms(y, sr=sr,
win_length=None, hop_length=512, decomposition='True')
>>> rms = extractor.rms(y, sr=44100,
win_length=0.05, hop_length=None, decomposition='True')

:parameters:
- y : np.ndarray [shape=(n,)]. Time series to calculate the RMS of.
- sr : integer. sampling rate of the audio file
- win_length : integer. The frame length of the music time series
(in s) to be considered. Default 50 ms.
- hop_length : integer. The amount of overlap between the frames
(in s). Default is half the window length.
- pad : integer. Amount which to pad by before frame decomposition.
- win_length : float. The frame length of the music time series
(in s) to be considered. Default 50 ms.
- hop_length : float. The amount of overlap between the frames
(in s). Default is half the window length.
- pad: float. The time in seconds that the signal is to be padded
by. The start and end will be padded equally by a reflection.
- decomposition : boolean. Whether or not to do a framewise
analysis of the time series
analysis of the time series

:returns:
If decomposition = 'False':
Expand All @@ -50,18 +51,24 @@ def rms(y, sr=44100, win_length=0.05, hop_length=None,
- A numpy array representing the root-mean-square of the
time-series of the signal per frame.
'''
assert len(y)>0, 'audio file must not be empty'
assert win_length>0 , 'window length must be positive'
assert (not hop_length) or hop_length>0, 'hop length must be positive'
assert (not pad) or pad>0, 'pad amount must be positive'
if decomposition:
if hop_length is None:
hop_length = win_length/2
win_length, hop_length = int(win_length*sr), int(hop_length*sr)
if pad:
pad *= sr
return framewise(rms, y, win_length, hop_length,
padAmt=pad, decomposition=False)
pad=pad, decomposition=False)
else:
return np.sqrt(np.sum(y**2)/len(y))


def zcr(y, sr=44100, p='second', d='one', n_fft=2048, hop_length=None,
pad=None, decomposition=True): # win_length=0.05
def zcr(y, sr=44100, p='second', d='one', win_length=0.05, hop_length=None,
pad=None, decomposition=True):
'''
Calculate the zero-crossing rate from a time-series signal.

Expand All @@ -70,7 +77,7 @@ def zcr(y, sr=44100, p='second', d='one', n_fft=2048, hop_length=None,
>>> y, sr = librosa.load('file.mp3')
>>> # Calculate a zero-crossing rate of the time-series of a
>>> # signal
>>> zcr = extractor.zcr(y, sr=sr, p='second', d='one',
>>> zcr = extractor.zcr(y, sr=44100, p='second', d='one',
win_length=0.05, hop_length=None,
decomposition='True')

Expand All @@ -80,14 +87,16 @@ def zcr(y, sr=44100, p='second', d='one', n_fft=2048, hop_length=None,
- p : Number of zero crossings either per 'second' or per 'sample'.
Default: 'second'.
- d : Number of zero crossings from negative to positive (or
equivalently positive to negative) only using d='one'
(default) or both directions using d='both'.
- win_length : integer. The frame length of the music time series
(in s) to be considered. Default 50 ms.
- hop_length : integer. The amount of overlap between the frames
(in samples). Default is half the window length.
equivalently positive to negative) only using d='one'
(default) or both directions using d='both'.
- win_length : float. The frame length of the music time series
(in s) to be considered. Default 50 ms.
- hop_length : float. The amount of overlap between the frames
(in s). Default is half the window length.
- pad: float. The time in seconds that the signal is to be padded
by. The start and end will be padded equally by a reflection.
- decomposition : boolean. Whether or not to do a framewise
analysis of the time series
analysis of the time series

:returns:
If decomposition = 'False':
Expand All @@ -96,49 +105,51 @@ def zcr(y, sr=44100, p='second', d='one', n_fft=2048, hop_length=None,
- A numpy array representing the zcr of the time-series of the
signal per frame.
'''
assert len(y)>0, 'audio file must not be empty'
assert win_length>0 , 'window length must be positive'
assert (not hop_length) or hop_length>0, 'hop length must be positive'
assert (not pad) or pad>0, 'pad amount must be positive'
if decomposition:
# win_length = sr * win_length
if hop_length is None:
# hop_length = int(win_length / 2)
hop_length = int(n_fft / 2)
return framewise(zcr, y, n_fft, hop_length, padAmt=pad,
sr=sr, p=p, d=d, decomposition=False) # win_length
hop_length = win_length/2
win_length, hop_length = int(win_length*sr), int(hop_length*sr)
return framewise(zcr, y, win_length, hop_length,
pad=pad, p=p, d=d, decomposition=False)
else:
zcrate = y[1:] * y[:len(y)-1]
# All zero crossings can be identified with a negative number
# zcrate = sum(zcrate < 0) / len(y)
zcrate = len(np.where(zcrate < 0)[0]) / len(y) # np.where() speed boost
zcrate = len(np.where(zcrate < 0)[0]) / len(y)
if p == 'second':
zcrate = zcrate * sr
zcrate *= sr
if d == 'one':
zcrate = zcrate / 2
zcrate /= 2
return zcrate


def spectralCentroid(y, sr=44100, n_fft=2048, hop_length=None,
toWin=True, pad=None, decomposition=True): #win_length=0.05
def spectralCentroid(y, sr=44100, win_length=0.05, hop_length=None,
pad=None, decomposition=True):
'''
Calculate the spectral centroid (mean) of a time-series signal. Commonly
used as the brightness of a sound.

:usage:
>>> # Load a file
>>> y, sr = librosa.load('file.mp3')
>>> # Calculate the spectral centroid of a time-series
>>> spectralCentroid = extractor.spectralCentroid(y,
sr=sr, win_length=0.05, hop_length=None,
decomposition=True)

:parameters:
- y : A numpy array [shape=(n,)] of time series to calculate the
spectral centroid of.
- sr : Sampling rate of the audio file. (Default = 22050)
- win_length : integer. The frame length of the music time series
(in s) to be considered. Default 50 ms.
- hop_length : integer. The amount of overlap between the frames
(in samples). Default is half the window length.
- sr : Sampling rate of the audio file. (Default = 44100)
- win_length : float. The frame length of the music time series
(in s) to be considered. Default 50 ms.
- hop_length : float. The amount of overlap between the frames
(in s). Default is half the window length.
- pad: float. The time in seconds that the signal is to be padded
by. The start and end will be padded equally by a reflection.
- decomposition: boolean. Whether or not to do a framewise
analysis of the time-series.
analysis of the time-series.

:returns:
If decomposition=False:
Expand All @@ -151,22 +162,19 @@ def spectralCentroid(y, sr=44100, n_fft=2048, hop_length=None,
- Beauchamp J. W., Synthesis by Spectral Amplitude and
'Brightness' Matching of Analyzed Musical Instrument Tones
'''
assert len(y)>0, 'audio file must not be empty'
assert win_length>0 , 'window length must be positive'
assert (not hop_length) or hop_length>0, 'hop length must be positive'
assert (not pad) or pad>0, 'pad amount must be positive'
if decomposition:
# win_length = sr * win_length
if hop_length is None:
hop_length = int(n_fft/2)
# hop_length = int(win_length/2)
return framewise(spectralCentroid, y, n_fft, hop_length,
toWin=toWin, padAmt=pad, sr=sr,
decomposition=False) #win_length
hop_length = win_length/2
win_length, hop_length = int(win_length*sr), int(hop_length*sr)
return framewise(spectralCentroid, y, win_length, hop_length,
pad=pad, decomposition=False)
else:
Y = np.fft.fft(y)
magns = np.abs(Y[:np.int(np.ceil(len(Y)/2))])
# Calculate the frequency bin values
freqs = np.fft.fftfreq(len(Y), 1/sr)[:np.int(np.ceil(len(Y)/2))]
# freqs = np.linspace(0, 1, len(Y))[:len(Y)/2] * sr
return np.dot(freqs, magns) / np.sum(magns)

freqs, ampls = spectrogram(y, sr)
return np.sum(ampls * freqs)/np.sum(ampls)

def spectralSpread(y, sr=44100, n_fft=2048, hop_length=None,
toWin=True, pad=None, decomposition=True):
Expand All @@ -182,7 +190,7 @@ def spectralSpread(y, sr=44100, n_fft=2048, hop_length=None,
:parameters:
- y : A numpy array [shape=(n,)] of time series to calculate the
spectral spread of.
- sr : Sampling rate of the audio file. (Default = 22050)
- sr : Sampling rate of the audio file. (Default = 44100)
- win_length : integer. The frame length of the music time series
(in s) to be considered. Default 50 ms.
- hop_length : integer. The amount of overlap between the frames
Expand All @@ -207,16 +215,16 @@ def spectralSpread(y, sr=44100, n_fft=2048, hop_length=None,
else:
# Calculate the spectrum
Y = np.fft.fft(y)
magns = np.abs(Y[:np.int(np.ceil(len(Y)/2))])
magns = np.abs(Y[:np.ceil(len(Y))/2])
# Calculate the frequency bin values
freqs = np.fft.fftfreq(len(Y), 1/sr)
# freqs = np.linspace(0, 1, len(Y)) * sr
# Calculate SC
SC = np.dot(freqs[:np.int(np.ceil(len(Y)/2))], magns) / np.sum(magns)
SC = np.dot(freqs[:np.ceil(len(Y)/2)], magns) / np.sum(magns)
scs = np.ones(len(Y))*SC
# Calculate the squared deviation from the spectral centroid
spread = (freqs - scs)**2
return np.sqrt(np.dot(spread[:np.int(np.ceil(len(Y)/2))], magns) / np.sum(magns))
return np.sqrt(np.dot(spread[:len(Y)/2], magns) / np.sum(magns))
# bins_var = np.linspace(0,1,len(Y)) * sr - np.ones(len(Y)) * SC
# bins_var = bins_var ** 2
# temp = np.dot(bins_var[:len(Y)/2], abs(Y[:len(Y)/2]))
Expand Down Expand Up @@ -262,7 +270,7 @@ def spectralFlatness(y, sr=44100, n_fft=2048, hop_length=None,
decomposition=False) # win_length
else:
Y = np.fft.fft(y)
Y_abs = abs(Y[:np.int(len(Y)/2)])
Y_abs = abs(Y[:len(Y)/2])
return sp.stats.mstats.gmean(Y_abs) / np.mean(Y_abs)

def CQT(y, sr=44100, cqt_hop=1024, seconds=2.0, n_bins=30, bins_per_octave=4, fmin=27.5,
Expand Down
Loading