machine-shop · alvinzz · Nov 10, 2016 · Nov 15, 2016 · Nov 15, 2016 · Nov 15, 2016
diff --git a/music_feats/features/extractor.py b/music_feats/features/extractor.py
@@ -1,6 +1,6 @@
 from __future__ import division
 import math
-import librosa
+# import librosa
 import numpy as np
 import scipy as sp
 from music_feats.features.util.utils import *
@@ -20,27 +20,28 @@
            'fluctuationCentroid',
 		   'MPS']
 
-def rms(y, sr=44100, win_length=0.05, hop_length=None,
-    pad=None, decomposition=True):
+def rms(y, sr=44100, win_length=0.05, hop_length=None, pad=None,
+    decomposition=True):
     '''
     Calculate root-mean-square energy from a time-series signal
         :usage:
                 >>> # Load a file
                 >>> y, sr = librosa.load('file.mp3')
                 >>> # Calculate the RMS of a time-series
-                >>> rms = extractor.rms(y, sr=sr,
-                        win_length=None, hop_length=512, decomposition='True')
+                >>> rms = extractor.rms(y, sr=44100,
+                        win_length=0.05, hop_length=None, decomposition='True')
 
         :parameters:
             - y : np.ndarray [shape=(n,)]. Time series to calculate the RMS of.
             - sr : integer. sampling rate of the audio file
-            - win_length : integer. The frame length of the music time series
-                           (in s) to be considered.  Default 50 ms.
-            - hop_length : integer. The amount of overlap between the frames
-                           (in s).  Default is half the window length.
-            - pad : integer. Amount which to pad by before frame decomposition.
+            - win_length : float. The frame length of the music time series
+                (in s) to be considered.  Default 50 ms.
+            - hop_length : float. The amount of overlap between the frames
+                (in s).  Default is half the window length.
+            - pad: float. The time in seconds that the signal is to be padded
+                by. The start and end will be padded equally by a reflection.
             - decomposition : boolean. Whether or not to do a framewise
-                              analysis of the time series
+                analysis of the time series
 
         :returns:
             If decomposition = 'False':
@@ -50,18 +51,24 @@ def rms(y, sr=44100, win_length=0.05, hop_length=None,
             - A numpy array representing the root-mean-square of the
               time-series of the signal per frame.
     '''
+    assert len(y)>0, 'audio file must not be empty'
+    assert win_length>0 , 'window length must be positive'
+    assert (not hop_length) or hop_length>0, 'hop length must be positive'
+    assert (not pad) or pad>0, 'pad amount must be positive'
     if decomposition:
         if hop_length is None:
             hop_length = win_length/2
         win_length, hop_length = int(win_length*sr), int(hop_length*sr)
+        if pad:
+            pad *= sr
         return framewise(rms, y, win_length, hop_length,
-            padAmt=pad, decomposition=False)
+            pad=pad, decomposition=False)
     else:
         return np.sqrt(np.sum(y**2)/len(y))
 
 
-def zcr(y, sr=44100, p='second', d='one', n_fft=2048, hop_length=None,
-        pad=None, decomposition=True): # win_length=0.05
+def zcr(y, sr=44100, p='second', d='one', win_length=0.05, hop_length=None,
+    pad=None, decomposition=True):
     '''
     Calculate the zero-crossing rate from a time-series signal.
 
@@ -70,7 +77,7 @@ def zcr(y, sr=44100, p='second', d='one', n_fft=2048, hop_length=None,
                 >>> y, sr = librosa.load('file.mp3')
                 >>> # Calculate a zero-crossing rate of the time-series of a
                 >>> # signal
-                >>> zcr = extractor.zcr(y, sr=sr, p='second', d='one',
+                >>> zcr = extractor.zcr(y, sr=44100, p='second', d='one',
                                         win_length=0.05, hop_length=None,
                                         decomposition='True')
 
@@ -80,14 +87,16 @@ def zcr(y, sr=44100, p='second', d='one', n_fft=2048, hop_length=None,
             - p : Number of zero crossings either per 'second' or per 'sample'.
                   Default: 'second'.
             - d : Number of zero crossings from negative to positive (or
-                  equivalently positive to negative) only using d='one'
-                  (default) or both directions using d='both'.
-            - win_length : integer. The frame length of the music time series
-                   (in s) to be considered.  Default 50 ms.
-            - hop_length : integer. The amount of overlap between the frames
-                    (in samples).  Default is half the window length.
+                equivalently positive to negative) only using d='one'
+                (default) or both directions using d='both'.
+            - win_length : float. The frame length of the music time series
+                (in s) to be considered.  Default 50 ms.
+            - hop_length : float. The amount of overlap between the frames
+                (in s).  Default is half the window length.
+            - pad: float. The time in seconds that the signal is to be padded
+                by. The start and end will be padded equally by a reflection.
             - decomposition : boolean. Whether or not to do a framewise
-                    analysis of the time series
+                analysis of the time series
 
         :returns:
             If decomposition = 'False':
@@ -96,49 +105,51 @@ def zcr(y, sr=44100, p='second', d='one', n_fft=2048, hop_length=None,
             - A numpy array representing the zcr of the time-series of the
               signal per frame.
     '''
+    assert len(y)>0, 'audio file must not be empty'
+    assert win_length>0 , 'window length must be positive'
+    assert (not hop_length) or hop_length>0, 'hop length must be positive'
+    assert (not pad) or pad>0, 'pad amount must be positive'
     if decomposition:
-        # win_length = sr * win_length
         if hop_length is None:
-            # hop_length = int(win_length / 2)
-            hop_length = int(n_fft / 2)
-        return framewise(zcr, y, n_fft, hop_length, padAmt=pad,
-                         sr=sr, p=p, d=d, decomposition=False) # win_length
+            hop_length = win_length/2
+        win_length, hop_length = int(win_length*sr), int(hop_length*sr)
+        return framewise(zcr, y, win_length, hop_length,
+            pad=pad, p=p, d=d, decomposition=False)
     else:
         zcrate = y[1:] * y[:len(y)-1]
         # All zero crossings can be identified with a negative number
-        # zcrate = sum(zcrate < 0) / len(y)
-        zcrate = len(np.where(zcrate < 0)[0]) / len(y) # np.where() speed boost
+        zcrate = len(np.where(zcrate < 0)[0]) / len(y)
         if p == 'second':
-            zcrate = zcrate * sr
+            zcrate *= sr
         if d == 'one':
-            zcrate = zcrate / 2
+            zcrate /= 2
         return zcrate
 
 
-def spectralCentroid(y, sr=44100, n_fft=2048, hop_length=None,
-                     toWin=True, pad=None, decomposition=True): #win_length=0.05
+def spectralCentroid(y, sr=44100, win_length=0.05, hop_length=None,
+    pad=None, decomposition=True):
     '''
     Calculate the spectral centroid (mean) of a time-series signal. Commonly
     used as the brightness of a sound.
-
         :usage:
                 >>> # Load a file
                 >>> y, sr = librosa.load('file.mp3')
                 >>> # Calculate the spectral centroid of a time-series
                 >>> spectralCentroid = extractor.spectralCentroid(y,
                     sr=sr, win_length=0.05, hop_length=None,
                     decomposition=True)
-
         :parameters:
             - y : A numpy array [shape=(n,)] of time series to calculate the
                   spectral centroid of.
-            - sr : Sampling rate of the audio file. (Default = 22050)
-            - win_length : integer. The frame length of the music time series
-              (in s) to be considered.  Default 50 ms.
-            - hop_length : integer. The amount of overlap between the frames
-              (in samples).  Default is half the window length.
+            - sr : Sampling rate of the audio file. (Default = 44100)
+            - win_length : float. The frame length of the music time series
+                  (in s) to be considered.  Default 50 ms.
+            - hop_length : float. The amount of overlap between the frames
+                  (in s).  Default is half the window length.
+            - pad: float. The time in seconds that the signal is to be padded
+                by. The start and end will be padded equally by a reflection.
             - decomposition: boolean. Whether or not to do a framewise
-              analysis of the time-series.
+                  analysis of the time-series.
 
         :returns:
             If decomposition=False:
@@ -151,22 +162,19 @@ def spectralCentroid(y, sr=44100, n_fft=2048, hop_length=None,
             - Beauchamp J. W., Synthesis by Spectral Amplitude and
             'Brightness' Matching of Analyzed Musical Instrument Tones
     '''
+    assert len(y)>0, 'audio file must not be empty'
+    assert win_length>0 , 'window length must be positive'
+    assert (not hop_length) or hop_length>0, 'hop length must be positive'
+    assert (not pad) or pad>0, 'pad amount must be positive'
     if decomposition:
-        # win_length = sr * win_length
         if hop_length is None:
-            hop_length = int(n_fft/2)
-            # hop_length = int(win_length/2)
-        return framewise(spectralCentroid, y, n_fft, hop_length,
-                         toWin=toWin, padAmt=pad, sr=sr,
-                         decomposition=False) #win_length
+            hop_length = win_length/2
+        win_length, hop_length = int(win_length*sr), int(hop_length*sr)
+        return framewise(spectralCentroid, y, win_length, hop_length,
+            pad=pad, decomposition=False)
     else:
-        Y = np.fft.fft(y)
-        magns = np.abs(Y[:np.int(np.ceil(len(Y)/2))])
-        # Calculate the frequency bin values
-        freqs = np.fft.fftfreq(len(Y), 1/sr)[:np.int(np.ceil(len(Y)/2))]
-        # freqs = np.linspace(0, 1, len(Y))[:len(Y)/2] * sr
-        return np.dot(freqs, magns) / np.sum(magns)
-
+        freqs, ampls = spectrogram(y, sr)
+        return np.sum(ampls * freqs)/np.sum(ampls)
 
 def spectralSpread(y, sr=44100, n_fft=2048, hop_length=None,
                    toWin=True, pad=None, decomposition=True):
@@ -182,7 +190,7 @@ def spectralSpread(y, sr=44100, n_fft=2048, hop_length=None,
         :parameters:
             - y : A numpy array [shape=(n,)] of time series to calculate the
               spectral spread of.
-            - sr : Sampling rate of the audio file. (Default = 22050)
+            - sr : Sampling rate of the audio file. (Default = 44100)
             - win_length : integer. The frame length of the music time series
               (in s) to be considered.  Default 50 ms.
             - hop_length : integer. The amount of overlap between the frames
@@ -207,16 +215,16 @@ def spectralSpread(y, sr=44100, n_fft=2048, hop_length=None,
     else:
         # Calculate the spectrum
         Y = np.fft.fft(y)
-        magns = np.abs(Y[:np.int(np.ceil(len(Y)/2))])
+        magns = np.abs(Y[:np.ceil(len(Y))/2])
         # Calculate the frequency bin values
         freqs = np.fft.fftfreq(len(Y), 1/sr)
         # freqs = np.linspace(0, 1, len(Y)) * sr
         # Calculate SC
-        SC = np.dot(freqs[:np.int(np.ceil(len(Y)/2))], magns) / np.sum(magns)
+        SC = np.dot(freqs[:np.ceil(len(Y)/2)], magns) / np.sum(magns)
         scs = np.ones(len(Y))*SC
         # Calculate the squared deviation from the spectral centroid
         spread = (freqs - scs)**2
-        return np.sqrt(np.dot(spread[:np.int(np.ceil(len(Y)/2))], magns) / np.sum(magns))
+        return np.sqrt(np.dot(spread[:len(Y)/2], magns) / np.sum(magns))
         # bins_var = np.linspace(0,1,len(Y)) * sr - np.ones(len(Y)) * SC
         # bins_var = bins_var ** 2
         # temp = np.dot(bins_var[:len(Y)/2], abs(Y[:len(Y)/2]))
@@ -262,7 +270,7 @@ def spectralFlatness(y, sr=44100, n_fft=2048, hop_length=None,
                          decomposition=False) # win_length
     else:
         Y = np.fft.fft(y)
-        Y_abs = abs(Y[:np.int(len(Y)/2)])
+        Y_abs = abs(Y[:len(Y)/2])
         return sp.stats.mstats.gmean(Y_abs) / np.mean(Y_abs)
 
 def CQT(y, sr=44100, cqt_hop=1024, seconds=2.0, n_bins=30, bins_per_octave=4, fmin=27.5,