From 1b7f9480b3da1d2c9a8a1e89fdaeae7f3d1abebb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20B=C3=B6ck?= Date: Thu, 1 Jun 2017 11:27:46 +0200 Subject: [PATCH 01/18] fix float array indexing --- madmom/features/tempo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/madmom/features/tempo.py b/madmom/features/tempo.py index 1e60ca9a1..00fa7ee63 100755 --- a/madmom/features/tempo.py +++ b/madmom/features/tempo.py @@ -215,7 +215,7 @@ def detect_tempo(histogram, fps): if len(peaks) == 0: # a flat histogram has no peaks, use the center bin if len(bins): - ret = np.asarray([tempi[len(bins) / 2], 1.]) + ret = np.asarray([tempi[len(bins) // 2], 1.]) else: # otherwise: no peaks, no tempo ret = np.asarray([NO_TEMPO, 0.]) From b53f8d40ec0751f37dee3f8398e130eefc70f3dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20B=C3=B6ck?= Date: Wed, 30 Aug 2017 13:15:54 +0200 Subject: [PATCH 02/18] refactor histogram method of TempoEstimationProcessor to use a dedicated histogram processor --- bin/TempoDetector | 5 +- madmom/features/beats.py | 27 ++-- madmom/features/tempo.py | 295 +++++++++++++++++++++++++++++------ tests/test_features_tempo.py | 13 +- 4 files changed, 274 insertions(+), 66 deletions(-) diff --git a/bin/TempoDetector b/bin/TempoDetector index 3ae90dbc5..3de26d943 100755 --- a/bin/TempoDetector +++ b/bin/TempoDetector @@ -55,7 +55,10 @@ def main(): # signal processing arguments SignalProcessor.add_arguments(p, norm=False, gain=0) # tempo arguments - TempoEstimationProcessor.add_arguments(p) + TempoEstimationProcessor.add_arguments(p, method='comb', min_bpm=40., + max_bpm=250., act_smooth=0.14, + hist_smooth=9, hist_buffer=10., + alpha=0.79) # mirex stuff g = p.add_mutually_exclusive_group() g.add_argument('--mirex', dest='tempo_format', diff --git a/madmom/features/beats.py b/madmom/features/beats.py index e7b7dcf38..bba8bf198 100755 --- a/madmom/features/beats.py +++ b/madmom/features/beats.py @@ -404,8 +404,15 @@ class BeatTrackingProcessor(Processor): look_ahead : float, optional Look `look_ahead` seconds in both directions to determine the local tempo and align the beats accordingly. + tempo_estimator : :class:`TempoEstimationProcessor`, optional + Use this processor to estimate the (local) tempo. If 'None' a default + tempo estimator will be created and used. fps : float, optional Frames per second. + kwargs : dict, optional + Keyword arguments passed to + :class:`madmom.features.tempo.TempoEstimationProcessor` if no + `tempo_estimator` was given. Notes ----- @@ -449,25 +456,21 @@ class BeatTrackingProcessor(Processor): """ LOOK_ASIDE = 0.2 - LOOK_AHEAD = 10 - # tempo defaults - TEMPO_METHOD = 'comb' - MIN_BPM = 40 - MAX_BPM = 240 - ACT_SMOOTH = 0.09 - HIST_SMOOTH = 7 - ALPHA = 0.79 + LOOK_AHEAD = 10. def __init__(self, look_aside=LOOK_ASIDE, look_ahead=LOOK_AHEAD, fps=None, - **kwargs): - # import the TempoEstimation here otherwise we have a loop - from .tempo import TempoEstimationProcessor + tempo_estimator=None, **kwargs): # save variables self.look_aside = look_aside self.look_ahead = look_ahead self.fps = fps # tempo estimator - self.tempo_estimator = TempoEstimationProcessor(fps=fps, **kwargs) + if tempo_estimator is None: + # import the TempoEstimation here otherwise we have a loop + from .tempo import TempoEstimationProcessor + # create default tempo estimator + tempo_estimator = TempoEstimationProcessor(fps=fps, **kwargs) + self.tempo_estimator = tempo_estimator def process(self, activations, **kwargs): """ diff --git a/madmom/features/tempo.py b/madmom/features/tempo.py index 00fa7ee63..29733fc79 100755 --- a/madmom/features/tempo.py +++ b/madmom/features/tempo.py @@ -14,6 +14,12 @@ from madmom.processors import Processor from madmom.audio.signal import smooth as smooth_signal +METHOD = 'comb' +ALPHA = 0.79 +MIN_BPM = 40. +MAX_BPM = 250. +ACT_SMOOTH = 0.14 +HIST_SMOOTH = 9 NO_TEMPO = np.nan @@ -234,7 +240,198 @@ def detect_tempo(histogram, fps): return np.atleast_2d(ret) -# tempo estimation processor class +# tempo histogram processor classes +class TempoHistogramProcessor(Processor): + """ + Tempo Histogram Processor class. + + Parameters + ---------- + min_bpm : float + Minimum tempo to detect [bpm]. + max_bpm : float + Maximum tempo to detect [bpm]. + fps : float, optional + Frames per second. + + Notes + ----- + This abstract class provides the basic tempo histogram functionality. + Please use one of the following implementations: + + - :class:`CombFilterTempoHistogramProcessor`, + - :class:`ACFTempoHistogramProcessor` or + - :class:`DBNTempoHistogramProcessor`. + + """ + + def __init__(self, min_bpm, max_bpm, fps=None, **kwargs): + # pylint: disable=unused-argument + self.min_bpm = min_bpm + self.max_bpm = max_bpm + self.fps = fps + + @property + def min_interval(self): + """Minimum beat interval [frames].""" + return int(np.floor(60. * self.fps / self.max_bpm)) + + @property + def max_interval(self): + """Maximum beat interval [frames].""" + return int(np.ceil(60. * self.fps / self.min_bpm)) + + @property + def intervals(self): + """Beat intervals [frames].""" + return np.arange(self.min_interval, self.max_interval + 1) + + +class CombFilterTempoHistogramProcessor(TempoHistogramProcessor): + """ + Create a tempo histogram with a bank of resonating comb filters. + + Parameters + ---------- + min_bpm : float, optional + Minimum tempo to detect [bpm]. + max_bpm : float, optional + Maximum tempo to detect [bpm]. + alpha : float, optional + Scaling factor for the comb filter. + fps : float, optional + Frames per second. + + """ + + def __init__(self, min_bpm=MIN_BPM, max_bpm=MAX_BPM, alpha=ALPHA, fps=None, + **kwargs): + # pylint: disable=unused-argument + super(CombFilterTempoHistogramProcessor, self).__init__( + min_bpm=min_bpm, max_bpm=max_bpm, fps=fps, **kwargs) + self.alpha = alpha + + def process(self, activations, **kwargs): + """ + Compute the histogram of the beat intervals with a bank of resonating + comb filters. + + Parameters + ---------- + activations : numpy array + Beat activation function. + + Returns + ------- + histogram_bins : numpy array + Bins of the beat interval histogram. + histogram_delays : numpy array + Corresponding delays [frames]. + + """ + return interval_histogram_comb(activations, self.alpha, + self.min_interval, self.max_interval) + + +class ACFTempoHistogramProcessor(TempoHistogramProcessor): + """ + Create a tempo histogram with autocorrelation. + + Parameters + ---------- + min_bpm : float, optional + Minimum tempo to detect [bpm]. + max_bpm : float, optional + Maximum tempo to detect [bpm]. + fps : float, optional + Frames per second. + + """ + + def __init__(self, min_bpm=MIN_BPM, max_bpm=MAX_BPM, fps=None, **kwargs): + # pylint: disable=unused-argument + super(ACFTempoHistogramProcessor, self).__init__( + min_bpm=min_bpm, max_bpm=max_bpm, fps=fps, **kwargs) + + def process(self, activations, **kwargs): + """ + Compute the histogram of the beat intervals with the autocorrelation + function. + + Parameters + ---------- + activations : numpy array + Beat activation function. + + Returns + ------- + histogram_bins : numpy array + Bins of the beat interval histogram. + histogram_delays : numpy array + Corresponding delays [frames]. + + """ + # build the tempo (i.e. inter beat interval) histogram and return it + return interval_histogram_acf(activations, self.min_interval, + self.max_interval) + + +class DBNTempoHistogramProcessor(TempoHistogramProcessor): + """ + Create a tempo histogram with a dynamic Bayesian network (DBN). + + Parameters + ---------- + min_bpm : float, optional + Minimum tempo to detect [bpm]. + max_bpm : float, optional + Maximum tempo to detect [bpm]. + fps : float, optional + Frames per second. + + """ + + def __init__(self, min_bpm=MIN_BPM, max_bpm=MAX_BPM, fps=None, **kwargs): + # pylint: disable=unused-argument + super(DBNTempoHistogramProcessor, self).__init__( + min_bpm=min_bpm, max_bpm=max_bpm, fps=fps, **kwargs) + + def process(self, activations, **kwargs): + """ + Compute the histogram of the beat intervals with a DBN. + + Parameters + ---------- + activations : numpy array + Beat activation function. + + Returns + ------- + histogram_bins : numpy array + Bins of the beat interval histogram. + histogram_delays : numpy array + Corresponding delays [frames]. + + """ + # build the tempo (i.e. inter beat interval) histogram and return it + from .beats import DBNBeatTrackingProcessor + # instantiate a DBN for beat tracking + dbn = DBNBeatTrackingProcessor(min_bpm=self.min_bpm, + max_bpm=self.max_bpm, + num_tempi=None, + fps=self.fps) + # get the best state path by calling the viterbi algorithm + path, _ = dbn.hmm.viterbi(activations.astype(np.float32)) + intervals = dbn.st.state_intervals[path] + # get the counts of the bins + bins = np.bincount(intervals, + minlength=dbn.st.intervals.max() + 1) + # truncate everything below the minimum interval of the state space + bins = bins[dbn.st.intervals.min():] + # build a histogram together with the intervals and return it + return bins, dbn.st.intervals + + class TempoEstimationProcessor(Processor): """ Tempo Estimation Processor class. @@ -255,6 +452,12 @@ class TempoEstimationProcessor(Processor): Scaling factor for the comb filter. fps : float, optional Frames per second. + histogram_processor : :class:`TempoHistogramProcessor`, optional + Processor used to create a tempo histogram. If 'None', a default + combfilter histogram processor will be created and used. + kwargs : dict, optional + Keyword arguments passed to :class:`CombFilterTempoHistogramProcessor` + if no `histogram_processor` was given. Examples -------- @@ -278,36 +481,53 @@ class TempoEstimationProcessor(Processor): [ 82.19178, 0.09629]]) """ - # default values for tempo estimation - METHOD = 'comb' - MIN_BPM = 40. - MAX_BPM = 250. - HIST_SMOOTH = 9 - ACT_SMOOTH = 0.14 - ALPHA = 0.79 def __init__(self, method=METHOD, min_bpm=MIN_BPM, max_bpm=MAX_BPM, - act_smooth=ACT_SMOOTH, hist_smooth=HIST_SMOOTH, alpha=ALPHA, - fps=None, **kwargs): + act_smooth=ACT_SMOOTH, hist_smooth=HIST_SMOOTH, fps=None, + histogram_processor=None, **kwargs): # pylint: disable=unused-argument - # save variables self.method = method - self.min_bpm = min_bpm - self.max_bpm = max_bpm self.act_smooth = act_smooth self.hist_smooth = hist_smooth - self.alpha = alpha self.fps = fps + if histogram_processor is None: + if method == 'acf': + histogram_processor = ACFTempoHistogramProcessor + elif method == 'comb': + histogram_processor = CombFilterTempoHistogramProcessor + elif method == 'dbn': + histogram_processor = DBNTempoHistogramProcessor + else: + raise ValueError('tempo histogram method unknown.') + # instantiate histogram processor + histogram_processor = histogram_processor( + min_bpm=min_bpm, max_bpm=max_bpm, fps=fps, **kwargs) + self.histogram_processor = histogram_processor + + @property + def min_bpm(self): + """Minimum tempo [bpm].""" + return self.histogram_processor.min_bpm + + @property + def max_bpm(self): + """Maximum tempo [bpm].""" + return self.histogram_processor.max_bpm + + @property + def intervals(self): + """Beat intervals [frames].""" + return self.histogram_processor.intervals @property def min_interval(self): """Minimum beat interval [frames].""" - return int(np.floor(60. * self.fps / self.max_bpm)) + return self.histogram_processor.min_interval @property def max_interval(self): """Maximum beat interval [frames].""" - return int(np.ceil(60. * self.fps / self.min_bpm)) + return self.histogram_processor.max_interval def process(self, activations, **kwargs): """ @@ -335,9 +555,9 @@ def process(self, activations, **kwargs): # detect the tempi and return them return detect_tempo(histogram, self.fps) - def interval_histogram(self, activations): + def interval_histogram(self, activations, **kwargs): """ - Compute the histogram of the beat intervals with the selected method. + Compute the histogram of the beat intervals. Parameters ---------- @@ -352,31 +572,7 @@ def interval_histogram(self, activations): Corresponding delays [frames]. """ - # build the tempo (i.e. inter beat interval) histogram and return it - if self.method == 'acf': - return interval_histogram_acf(activations, self.min_interval, - self.max_interval) - elif self.method == 'comb': - return interval_histogram_comb(activations, self.alpha, - self.min_interval, - self.max_interval) - elif self.method == 'dbn': - from .beats import DBNBeatTrackingProcessor - # instantiate a DBN for beat tracking - dbn = DBNBeatTrackingProcessor(min_bpm=self.min_bpm, - max_bpm=self.max_bpm, - num_tempi=None, fps=self.fps) - # get the best state path by calling the viterbi algorithm - path, _ = dbn.hmm.viterbi(activations.astype(np.float32)) - intervals = dbn.st.state_intervals[path] - # get the counts of the bins - bins = np.bincount(intervals, minlength=dbn.st.intervals.max() + 1) - # truncate everything below the minimum interval of the state space - bins = bins[dbn.st.intervals.min():] - # build a histogram together with the intervals and return it - return bins, dbn.st.intervals - else: - raise ValueError('tempo estimation method unknown') + return self.histogram_processor(activations, **kwargs) def dominant_interval(self, histogram): """ @@ -398,9 +594,9 @@ def dominant_interval(self, histogram): return dominant_interval(histogram, self.hist_smooth) @staticmethod - def add_arguments(parser, method=METHOD, min_bpm=MIN_BPM, max_bpm=MAX_BPM, - act_smooth=ACT_SMOOTH, hist_smooth=HIST_SMOOTH, - alpha=ALPHA): + def add_arguments(parser, method=None, min_bpm=None, max_bpm=None, + act_smooth=None, hist_smooth=None, hist_buffer=None, + alpha=None): """ Add tempo estimation related arguments to an existing parser. @@ -418,6 +614,8 @@ def add_arguments(parser, method=METHOD, min_bpm=MIN_BPM, max_bpm=MAX_BPM, Smooth the activation function over `act_smooth` seconds. hist_smooth : int, optional Smooth the tempo histogram over `hist_smooth` bins. + hist_buffer : float, optional + Aggregate the tempo histogram over `hist_buffer` seconds. alpha : float, optional Scaling factor for the comb filter. @@ -455,6 +653,11 @@ def add_arguments(parser, method=METHOD, min_bpm=MIN_BPM, max_bpm=MAX_BPM, default=hist_smooth, help='smooth the tempo histogram over N bins ' '[default=%(default)d]') + if hist_buffer is not None: + g.add_argument('--hist_buffer', action='store', type=float, + default=hist_buffer, + help='aggregate the tempo histogram over N seconds ' + '[default=%(default).2f]') if alpha is not None: g.add_argument('--alpha', action='store', type=float, default=alpha, diff --git a/tests/test_features_tempo.py b/tests/test_features_tempo.py index 5b8565a46..c377d2c7d 100644 --- a/tests/test_features_tempo.py +++ b/tests/test_features_tempo.py @@ -93,11 +93,9 @@ def test_types(self): self.assertIsInstance(self.processor.max_bpm, float) self.assertIsInstance(self.processor.act_smooth, float) self.assertIsInstance(self.processor.hist_smooth, int) - self.assertIsInstance(self.processor.alpha, float) self.assertIsInstance(self.processor.fps, float) - # properties - self.assertIsInstance(self.processor.min_interval, int) - self.assertIsInstance(self.processor.max_interval, int) + self.assertIsInstance(self.processor.histogram_processor, + TempoHistogramProcessor) def test_values(self): self.assertTrue(self.processor.method == 'comb') @@ -105,10 +103,11 @@ def test_values(self): self.assertTrue(self.processor.max_bpm == 250) self.assertTrue(self.processor.act_smooth == 0.14) self.assertTrue(self.processor.hist_smooth == 9) - self.assertTrue(self.processor.alpha == 0.79) self.assertTrue(self.processor.fps == 100) - self.assertTrue(self.processor.min_interval == 24) - self.assertTrue(self.processor.max_interval == 150) + # test default values of the histogram processor + self.assertTrue(self.processor.histogram_processor.alpha == 0.79) + self.assertTrue(self.processor.histogram_processor.min_interval == 24) + self.assertTrue(self.processor.histogram_processor.max_interval == 150) def test_process(self): tempi = self.processor(act) From 1018adf276a0e5e39cd98cc139d6e59c36ed52fa Mon Sep 17 00:00:00 2001 From: SebastianPoell Date: Thu, 8 Jun 2017 16:45:26 +0200 Subject: [PATCH 03/18] TempoDetector can operate on live audio signals --- CHANGES.rst | 1 + bin/TempoDetector | 2 +- madmom/features/tempo.py | 105 ++++++++++++++++++++++++++++++++++++--- 3 files changed, 101 insertions(+), 7 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index c90e607bc..eeac280e1 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -6,6 +6,7 @@ Version 0.16.dev0 New features: +* `TempoDetector` can operate on live audio signals (#292) * Added chord evaluation (#309) Bug fixes: diff --git a/bin/TempoDetector b/bin/TempoDetector index 3de26d943..3c67fa2e2 100755 --- a/bin/TempoDetector +++ b/bin/TempoDetector @@ -50,7 +50,7 @@ def main(): # version p.add_argument('--version', action='version', version='TempoDetector.2016') # input/output options - io_arguments(p, output_suffix='.bpm.txt') + io_arguments(p, output_suffix='.bpm.txt', online=True) ActivationsProcessor.add_arguments(p) # signal processing arguments SignalProcessor.add_arguments(p, norm=False, gain=0) diff --git a/madmom/features/tempo.py b/madmom/features/tempo.py index 29733fc79..484d68f2f 100755 --- a/madmom/features/tempo.py +++ b/madmom/features/tempo.py @@ -10,6 +10,7 @@ from __future__ import absolute_import, division, print_function import numpy as np +import sys from madmom.processors import Processor from madmom.audio.signal import smooth as smooth_signal @@ -265,11 +266,12 @@ class TempoHistogramProcessor(Processor): """ - def __init__(self, min_bpm, max_bpm, fps=None, **kwargs): + def __init__(self, min_bpm, max_bpm, fps=None, online=False, **kwargs): # pylint: disable=unused-argument self.min_bpm = min_bpm self.max_bpm = max_bpm self.fps = fps + self.online = online @property def min_interval(self): @@ -286,6 +288,31 @@ def intervals(self): """Beat intervals [frames].""" return np.arange(self.min_interval, self.max_interval + 1) + def reset(self): + """Reset to initial state.""" + raise NotImplementedError('Must be implemented by subclass.') + + def process(self, activations, **kwargs): + """ + Compute the histogram of the beat intervals. + + Parameters + ---------- + activations : numpy array + Beat activation function. + + Returns + ------- + tempi : numpy array + Array with the dominant tempi [bpm] (first column) and their + relative strengths (second column). + + """ + if self.online: + return self.process_online(activations, **kwargs) + else: + return self.process_offline(activations, **kwargs) + class CombFilterTempoHistogramProcessor(TempoHistogramProcessor): """ @@ -311,7 +338,7 @@ def __init__(self, min_bpm=MIN_BPM, max_bpm=MAX_BPM, alpha=ALPHA, fps=None, min_bpm=min_bpm, max_bpm=max_bpm, fps=fps, **kwargs) self.alpha = alpha - def process(self, activations, **kwargs): + def process_offline(self, activations, **kwargs): """ Compute the histogram of the beat intervals with a bank of resonating comb filters. @@ -353,7 +380,7 @@ def __init__(self, min_bpm=MIN_BPM, max_bpm=MAX_BPM, fps=None, **kwargs): super(ACFTempoHistogramProcessor, self).__init__( min_bpm=min_bpm, max_bpm=max_bpm, fps=fps, **kwargs) - def process(self, activations, **kwargs): + def process_offline(self, activations, **kwargs): """ Compute the histogram of the beat intervals with the autocorrelation function. @@ -396,7 +423,7 @@ def __init__(self, min_bpm=MIN_BPM, max_bpm=MAX_BPM, fps=None, **kwargs): super(DBNTempoHistogramProcessor, self).__init__( min_bpm=min_bpm, max_bpm=max_bpm, fps=fps, **kwargs) - def process(self, activations, **kwargs): + def process_offline(self, activations, **kwargs): """ Compute the histogram of the beat intervals with a DBN. @@ -484,12 +511,15 @@ class TempoEstimationProcessor(Processor): def __init__(self, method=METHOD, min_bpm=MIN_BPM, max_bpm=MAX_BPM, act_smooth=ACT_SMOOTH, hist_smooth=HIST_SMOOTH, fps=None, - histogram_processor=None, **kwargs): + online=False, histogram_processor=None, **kwargs): # pylint: disable=unused-argument self.method = method self.act_smooth = act_smooth self.hist_smooth = hist_smooth self.fps = fps + self.online = online + if self.online: + self.visualize = kwargs.get('verbose', False) if histogram_processor is None: if method == 'acf': histogram_processor = ACFTempoHistogramProcessor @@ -501,7 +531,8 @@ def __init__(self, method=METHOD, min_bpm=MIN_BPM, max_bpm=MAX_BPM, raise ValueError('tempo histogram method unknown.') # instantiate histogram processor histogram_processor = histogram_processor( - min_bpm=min_bpm, max_bpm=max_bpm, fps=fps, **kwargs) + min_bpm=min_bpm, max_bpm=max_bpm, fps=fps, online=online, + **kwargs) self.histogram_processor = histogram_processor @property @@ -529,10 +560,35 @@ def max_interval(self): """Maximum beat interval [frames].""" return self.histogram_processor.max_interval + def reset(self): + """Reset to initial state.""" + self.histogram_processor.reset() + def process(self, activations, **kwargs): """ Detect the tempi from the (beat) activations. + Parameters + ---------- + activations : numpy array + Beat activation function. + + Returns + ------- + tempi : numpy array + Array with the dominant tempi [bpm] (first column) and their + relative strengths (second column). + + """ + if self.online: + return self.process_online(activations, **kwargs) + else: + return self.process_offline(activations, **kwargs) + + def process_offline(self, activations, **kwargs): + """ + Detect the tempi from the (beat) activations. + Parameters ---------- activations : numpy array @@ -555,6 +611,43 @@ def process(self, activations, **kwargs): # detect the tempi and return them return detect_tempo(histogram, self.fps) + def process_online(self, activations, reset=True, **kwargs): + """ + Detect the tempi from the (beat) activations in online mode. + + Parameters + ---------- + activations : numpy array + Beat activation function processed frame by frame. + reset : bool, optional + Reset the TempoEstimationProcessor to its initial state before + processing. + + Returns + ------- + tempi : numpy array + Array with the dominant tempi [bpm] (first column) and their + relative strengths (second column). + + """ + # multiple activations will result in multiple tempi + tempi = [] + # iterate over all activations + for activation in activations: + # build the tempo histogram depending on the chosen method + histogram = self.interval_histogram(activation, reset=reset) + # smooth the histogram + histogram = smooth_histogram(histogram, self.hist_smooth) + # detect the tempo and append it to the found tempi + tempo = detect_tempo(histogram, self.fps) + tempi.append(tempo) + # visualize tempo + if self.visualize: + sys.stderr.write('\r%s' % ''.join(str(tempo[:2, 0]))) + sys.stderr.flush() + # return last detected tempo + return tempi[-1] + def interval_histogram(self, activations, **kwargs): """ Compute the histogram of the beat intervals. From 7e16296b4200804c853d7f91f2da32bfbe64c23b Mon Sep 17 00:00:00 2001 From: SebastianPoell Date: Thu, 8 Jun 2017 17:45:00 +0200 Subject: [PATCH 04/18] add reset method to BufferProcessor --- madmom/processors.py | 22 ++++++++++++++++++++-- tests/test_processors.py | 9 ++++++++- 2 files changed, 28 insertions(+), 3 deletions(-) diff --git a/madmom/processors.py b/madmom/processors.py index b387e5466..6a2317667 100644 --- a/madmom/processors.py +++ b/madmom/processors.py @@ -639,11 +639,16 @@ class BufferProcessor(Processor): ---------- buffer_size : int or tuple Size of the buffer (time steps, [additional dimensions]). + init : numpy array, optional + Init the buffer with this array. + init_value : float, optional + If only `buffer_size` is given but no `init`, use this value to + initialise the buffer. Notes ----- - If `buffer_size` (or the first value thereof) is 1, only the un-buffered - current value is returned. + If `buffer_size` (or the first item thereof in case of tuple) is 1, + only the un-buffered current value is returned. If context is needed, `buffer_size` must be set to >1. E.g. SpectrogramDifference needs a context of two frames to be able to @@ -664,8 +669,21 @@ def __init__(self, buffer_size=None, init=None, init_value=0): init = np.ones(buffer_size) * init_value # save variables self.buffer_size = buffer_size + self.init = init self.buffer = init + def reset(self, init=None): + """ + Reset BufferProcessor to its initial state. + + Parameters + ---------- + init : numpy array, shape (num_hiddens,), optional + Reset BufferProcessor to this initial state. + + """ + self.buffer = init if init is not None else self.init + def process(self, data, **kwargs): """ Buffer the data. diff --git a/tests/test_processors.py b/tests/test_processors.py index 9964249d3..540d4a3e7 100644 --- a/tests/test_processors.py +++ b/tests/test_processors.py @@ -8,7 +8,6 @@ from __future__ import absolute_import, division, print_function import tempfile import unittest -import sys from madmom.processors import * from madmom.models import * @@ -71,6 +70,14 @@ def test_2d(self): self.assertTrue(result.shape == (5, 2)) self.assertTrue(np.allclose(result.ravel(), np.arange(4, 14))) + def test_reset(self): + buffer = BufferProcessor(5, init=np.ones(5)) + self.assertTrue(np.allclose(buffer.buffer, 1)) + result = buffer(np.arange(2)) + self.assertTrue(np.allclose(result, [1, 1, 1, 0, 1])) + buffer.reset() + self.assertTrue(np.allclose(buffer.buffer, 1)) + # clean up def teardown(): From f6c779139bdac301c3eb08760a045eb92f52ca54 Mon Sep 17 00:00:00 2001 From: SebastianPoell Date: Thu, 8 Jun 2017 18:10:30 +0200 Subject: [PATCH 05/18] online CombFilterTempoEstimator --- madmom/features/tempo.py | 67 ++++++++++++++++++++++++++++++++-- tests/test_bin.py | 10 +++++ tests/test_features_tempo.py | 71 ++++++++++++++++++++++++++++++++++++ 3 files changed, 144 insertions(+), 4 deletions(-) diff --git a/madmom/features/tempo.py b/madmom/features/tempo.py index 484d68f2f..fba07de4f 100755 --- a/madmom/features/tempo.py +++ b/madmom/features/tempo.py @@ -12,7 +12,7 @@ import numpy as np import sys -from madmom.processors import Processor +from madmom.processors import Processor, BufferProcessor from madmom.audio.signal import smooth as smooth_signal METHOD = 'comb' @@ -21,6 +21,7 @@ MAX_BPM = 250. ACT_SMOOTH = 0.14 HIST_SMOOTH = 9 +HIST_BUFFER = 10. NO_TEMPO = np.nan @@ -326,17 +327,31 @@ class CombFilterTempoHistogramProcessor(TempoHistogramProcessor): Maximum tempo to detect [bpm]. alpha : float, optional Scaling factor for the comb filter. + hist_buffer : float, optional + Use a buffer of this size to sum the max. bins in online mode + [seconds]. fps : float, optional Frames per second. + online : bool, optional + Operate in online (i.e. causal) mode. """ - def __init__(self, min_bpm=MIN_BPM, max_bpm=MAX_BPM, alpha=ALPHA, fps=None, - **kwargs): + def __init__(self, min_bpm=MIN_BPM, max_bpm=MAX_BPM, alpha=ALPHA, + hist_buffer=HIST_BUFFER, fps=None, online=False, **kwargs): # pylint: disable=unused-argument super(CombFilterTempoHistogramProcessor, self).__init__( - min_bpm=min_bpm, max_bpm=max_bpm, fps=fps, **kwargs) + min_bpm=min_bpm, max_bpm=max_bpm, fps=fps, online=online, **kwargs) self.alpha = alpha + if self.online: + self.combfilter_matrix = [] + self.buffer = BufferProcessor((int(hist_buffer * self.fps), + len(self.intervals))) + + def reset(self): + """Reset to initial state.""" + self.combfilter_matrix = [] + self.buffer.reset() def process_offline(self, activations, **kwargs): """ @@ -359,6 +374,50 @@ def process_offline(self, activations, **kwargs): return interval_histogram_comb(activations, self.alpha, self.min_interval, self.max_interval) + def process_online(self, activations, reset=True, **kwargs): + """ + Compute the histogram of the beat intervals with a bank of resonating + comb filters in online mode. + + Parameters + ---------- + activations : numpy float + Beat activation function. + reset : bool, optional + Reset to initial state before processing. + + Returns + ------- + histogram_bins : numpy array + Bins of the tempo histogram. + histogram_delays : numpy array + Corresponding delays [frames]. + + """ + # reset to initial state + if reset: + self.reset() + # expand the activation for every tau + activations = np.full(len(self.intervals), activations, dtype=np.float) + # append it to the comb filter matrix + self.combfilter_matrix.append(activations) + # online feed backward comb filter + min_tau = self.min_interval + for i in self.intervals: + if len(self.combfilter_matrix) > i: + self.combfilter_matrix[-1][i - min_tau] += self.alpha * \ + self.combfilter_matrix[-1 - i][i - min_tau] + # retrieve maxima + act_max = self.combfilter_matrix[-1] == \ + np.max(self.combfilter_matrix[-1], axis=-1) + # compute the max bins + bins = self.combfilter_matrix[-1] * act_max + # use a buffer to only keep bins of the last seconds + # shift buffer and put new bins at end of buffer + bins = self.buffer(bins) + # build a histogram together with the intervals and return it + return np.sum(bins, axis=0), self.intervals + class ACFTempoHistogramProcessor(TempoHistogramProcessor): """ diff --git a/tests/test_bin.py b/tests/test_bin.py index f0d01f4ae..3766ab2f4 100644 --- a/tests/test_bin.py +++ b/tests/test_bin.py @@ -868,6 +868,7 @@ def setUp(self): pj(ACTIVATIONS_PATH, "sample.beats_blstm.npz")) self.result = np.loadtxt( pj(DETECTIONS_PATH, "sample.tempo_detector.txt")) + self.online_results = np.array([176.47, 88.24, 0.58]) def test_help(self): self.assertTrue(run_help(self.bin)) @@ -901,6 +902,15 @@ def test_run(self): result = np.loadtxt(tmp_result) self.assertTrue(np.allclose(result, self.result, atol=1e-5)) + def test_online(self): + run_program([self.bin, 'online', sample_file, '-o', tmp_result]) + result = np.loadtxt(tmp_result) + self.assertTrue(np.allclose(result[-1], self.online_results)) + run_program([self.bin, 'single', '--online', sample_file, '-o', + tmp_result]) + result = np.loadtxt(tmp_result) + self.assertTrue(np.allclose(result, self.online_results)) + # clean up def teardown(): diff --git a/tests/test_features_tempo.py b/tests/test_features_tempo.py index c377d2c7d..ba3706980 100644 --- a/tests/test_features_tempo.py +++ b/tests/test_features_tempo.py @@ -19,6 +19,11 @@ COMB_TEMPI = np.array([[176.470, 0.475], [117.647, 0.177], [240.0, 0.154], [68.966, 0.099], [82.192, 0.096]]) +COMB_TEMPI_ONLINE = [[176.470588, 0.289414003], [115.384615, 0.124638601], + [230.769231, 0.0918372569], [84.5070423, 0.0903815502], + [75.0000000, 0.0713704506], [53.5714286, 0.0701783497], + [65.9340659, 0.0696296514], [49.1803279, 0.0676349815], + [61.2244898, 0.0646209647], [40.8163265, 0.0602941909]] HIST = interval_histogram_comb(act, 0.79, min_tau=24, max_tau=150) @@ -113,6 +118,72 @@ def test_process(self): tempi = self.processor(act) self.assertTrue(np.allclose(tempi, COMB_TEMPI, atol=0.01)) + def test_process_online(self): + processor = TempoEstimationProcessor(fps=fps, online=True) + tempi = [processor.process_online(np.atleast_1d(a), reset=False) + for a in act] + self.assertTrue(np.allclose(tempi[-1], COMB_TEMPI_ONLINE)) + # with resetting results are the same + processor.reset() + tempi = [processor.process_online(np.atleast_1d(a), reset=False) + for a in act] + self.assertTrue(np.allclose(tempi[-1], COMB_TEMPI_ONLINE)) + # without resetting results are different + tempi = [processor.process_online(np.atleast_1d(a), reset=False) + for a in act] + self.assertTrue(np.allclose(tempi[-1][:3], [[176.470588, 0.31322337], + [85.7142857, 0.11437361], + [115.384615, 0.10919612]])) + + +class TestCombFilterTempoHistogramProcessorClass(unittest.TestCase): + + def setUp(self): + self.processor = CombFilterTempoHistogramProcessor(fps=fps) + self.online_processor = CombFilterTempoHistogramProcessor(fps=fps, + online=True) + + def test_types(self): + self.assertIsInstance(self.processor.min_bpm, float) + self.assertIsInstance(self.processor.max_bpm, float) + self.assertIsInstance(self.processor.alpha, float) + self.assertIsInstance(self.processor.fps, float) + # properties + self.assertIsInstance(self.processor.min_interval, int) + self.assertIsInstance(self.processor.max_interval, int) + + def test_values(self): + self.assertTrue(self.processor.min_bpm == 40) + self.assertTrue(self.processor.max_bpm == 250) + self.assertTrue(self.processor.alpha == 0.79) + self.assertTrue(self.processor.fps == 100) + self.assertTrue(self.processor.min_interval == 24) + self.assertTrue(self.processor.max_interval == 150) + + def test_tempo(self): + tempo_processor = TempoEstimationProcessor( + histogram_processor=self.processor, fps=fps) + tempi = tempo_processor(act) + self.assertTrue(np.allclose(tempi, COMB_TEMPI, atol=0.01)) + + def test_tempo_online(self): + tempo_processor = TempoEstimationProcessor( + histogram_processor=self.online_processor, fps=fps, online=True) + tempi = [tempo_processor.process_online(np.atleast_1d(a), reset=False) + for a in act] + self.assertTrue(np.allclose(tempi[-1], COMB_TEMPI_ONLINE)) + # with resetting results are the same + tempo_processor.reset() + tempi = [tempo_processor.process_online(np.atleast_1d(a), reset=False) + for a in act] + self.assertTrue(np.allclose(tempi[-1], COMB_TEMPI_ONLINE)) + # without resetting results are different + tempi = [tempo_processor.process_online(np.atleast_1d(a), reset=False) + for a in act] + self.assertTrue(np.allclose(tempi[-1][:3], [[176.470588, 0.31322337], + [85.7142857, 0.11437361], + [115.384615, 0.10919612]])) + class TestWriteTempoFunction(unittest.TestCase): From 8d69f6ea2829c4b76fd50c88a3cb1c6ea882e89c Mon Sep 17 00:00:00 2001 From: SebastianPoell Date: Thu, 8 Jun 2017 18:27:23 +0200 Subject: [PATCH 06/18] online ACFTempoEstimator --- madmom/features/tempo.py | 45 +++++++++++++++++++++++++++-- tests/test_features_tempo.py | 55 +++++++++++++++++++++++++++++++++++- 2 files changed, 97 insertions(+), 3 deletions(-) diff --git a/madmom/features/tempo.py b/madmom/features/tempo.py index fba07de4f..2eaa3e3d7 100755 --- a/madmom/features/tempo.py +++ b/madmom/features/tempo.py @@ -429,15 +429,27 @@ class ACFTempoHistogramProcessor(TempoHistogramProcessor): Minimum tempo to detect [bpm]. max_bpm : float, optional Maximum tempo to detect [bpm]. + buffer_size : float, optional + Use a buffer of this size for the activations to calculate the + auto-correlation function [seconds]. fps : float, optional Frames per second. + online : bool, optional + Operate in online (i.e. causal) mode. """ - def __init__(self, min_bpm=MIN_BPM, max_bpm=MAX_BPM, fps=None, **kwargs): + def __init__(self, min_bpm=MIN_BPM, max_bpm=MAX_BPM, + buffer_size=HIST_BUFFER, fps=None, online=False, **kwargs): # pylint: disable=unused-argument super(ACFTempoHistogramProcessor, self).__init__( - min_bpm=min_bpm, max_bpm=max_bpm, fps=fps, **kwargs) + min_bpm=min_bpm, max_bpm=max_bpm, fps=fps, online=online, **kwargs) + if self.online: + self.buffer = BufferProcessor(int(buffer_size * self.fps)) + + def reset(self): + """Reset to initial state.""" + self.buffer.reset() def process_offline(self, activations, **kwargs): """ @@ -461,6 +473,35 @@ def process_offline(self, activations, **kwargs): return interval_histogram_acf(activations, self.min_interval, self.max_interval) + def process_online(self, activations, reset=True, **kwargs): + """ + Compute the histogram of the beat intervals with the autocorrelation + function in online mode. + + Parameters + ---------- + activations : numpy float + Beat activation function. + reset : bool, optional + Reset to initial state before processing. + + Returns + ------- + histogram_bins : numpy array + Bins of the tempo histogram. + histogram_delays : numpy array + Corresponding delays [frames]. + + """ + # reset to initial state + if reset: + self.reset() + # shift buffer and put new activations at end of buffer + activations = self.buffer(activations) + # use offline acf function on buffered activations + return interval_histogram_acf(activations, self.min_interval, + self.max_interval) + class DBNTempoHistogramProcessor(TempoHistogramProcessor): """ diff --git a/tests/test_features_tempo.py b/tests/test_features_tempo.py index ba3706980..bc5a7bbec 100644 --- a/tests/test_features_tempo.py +++ b/tests/test_features_tempo.py @@ -24,7 +24,13 @@ [75.0000000, 0.0713704506], [53.5714286, 0.0701783497], [65.9340659, 0.0696296514], [49.1803279, 0.0676349815], [61.2244898, 0.0646209647], [40.8163265, 0.0602941909]] - +ACF_TEMPI = np.array([[176.470, 0.246], [86.956, 0.226], [58.823, 0.181], + [43.795, 0.137], [115.384, 0.081], [70.588, 0.067], + [50.847, 0.058]]) +ACF_TEMPI_ONLINE = [[176.470588, 0.253116038], [88.2352941, 0.231203195], + [58.8235294, 0.187827698], [43.7956204, 0.139373027], + [115.384615, 0.0749783568], [69.7674419, 0.0599632291], + [50.4201681, 0.0535384559]] HIST = interval_histogram_comb(act, 0.79, min_tau=24, max_tau=150) @@ -185,6 +191,53 @@ def test_tempo_online(self): [115.384615, 0.10919612]])) +class TestACFTempoHistogramProcessorClass(unittest.TestCase): + + def setUp(self): + self.processor = ACFTempoHistogramProcessor(fps=fps) + self.online_processor = ACFTempoHistogramProcessor(fps=fps, + online=True) + + def test_types(self): + self.assertIsInstance(self.processor.min_bpm, float) + self.assertIsInstance(self.processor.max_bpm, float) + self.assertIsInstance(self.processor.fps, float) + # properties + self.assertIsInstance(self.processor.min_interval, int) + self.assertIsInstance(self.processor.max_interval, int) + + def test_values(self): + self.assertTrue(self.processor.min_bpm == 40) + self.assertTrue(self.processor.max_bpm == 250) + self.assertTrue(self.processor.fps == 100) + self.assertTrue(self.processor.min_interval == 24) + self.assertTrue(self.processor.max_interval == 150) + + def test_tempo(self): + tempo_processor = TempoEstimationProcessor( + histogram_processor=self.processor, fps=fps) + tempi = tempo_processor(act) + self.assertTrue(np.allclose(tempi, ACF_TEMPI, atol=0.01)) + + def test_tempo_online(self): + tempo_processor = TempoEstimationProcessor( + histogram_processor=self.online_processor, fps=fps, online=True) + tempi = [tempo_processor.process_online(np.atleast_1d(a), reset=False) + for a in act] + self.assertTrue(np.allclose(tempi[-1], ACF_TEMPI_ONLINE)) + # with resetting results are the same + tempo_processor.reset() + tempi = [tempo_processor.process_online(np.atleast_1d(a), reset=False) + for a in act] + self.assertTrue(np.allclose(tempi[-1], ACF_TEMPI_ONLINE)) + # without resetting results are different + tempi = [tempo_processor.process_online(np.atleast_1d(a), reset=False) + for a in act] + self.assertTrue(np.allclose(tempi[-1][:3], [[176.4705882, 0.2414368], + [86.95652174, 0.2248635], + [58.25242718, 0.1878183]])) + + class TestWriteTempoFunction(unittest.TestCase): def setUp(self): From 5c66796c6978edb7744813b30d06f078453c850e Mon Sep 17 00:00:00 2001 From: SebastianPoell Date: Thu, 8 Jun 2017 18:39:09 +0200 Subject: [PATCH 07/18] online DBNTempoEstimator --- madmom/features/tempo.py | 68 ++++++++++++++++++++++++++++-------- tests/test_features_tempo.py | 41 ++++++++++++++++++++++ 2 files changed, 95 insertions(+), 14 deletions(-) diff --git a/madmom/features/tempo.py b/madmom/features/tempo.py index 2eaa3e3d7..73cfb82a9 100755 --- a/madmom/features/tempo.py +++ b/madmom/features/tempo.py @@ -515,13 +515,24 @@ class DBNTempoHistogramProcessor(TempoHistogramProcessor): Maximum tempo to detect [bpm]. fps : float, optional Frames per second. + online : bool, optional + Operate in online (i.e. causal) mode. """ - def __init__(self, min_bpm=MIN_BPM, max_bpm=MAX_BPM, fps=None, **kwargs): + def __init__(self, min_bpm=MIN_BPM, max_bpm=MAX_BPM, fps=None, + online=False, **kwargs): # pylint: disable=unused-argument super(DBNTempoHistogramProcessor, self).__init__( - min_bpm=min_bpm, max_bpm=max_bpm, fps=fps, **kwargs) + min_bpm=min_bpm, max_bpm=max_bpm, fps=fps, online=online, **kwargs) + from .beats import DBNBeatTrackingProcessor + self.dbn = DBNBeatTrackingProcessor( + min_bpm=self.min_bpm, max_bpm=self.max_bpm, fps=self.fps, + online=online, **kwargs) + + def reset(self): + """Reset DBN to initial state.""" + self.dbn.hmm.reset() def process_offline(self, activations, **kwargs): """ @@ -540,23 +551,52 @@ def process_offline(self, activations, **kwargs): Corresponding delays [frames]. """ - # build the tempo (i.e. inter beat interval) histogram and return it - from .beats import DBNBeatTrackingProcessor - # instantiate a DBN for beat tracking - dbn = DBNBeatTrackingProcessor(min_bpm=self.min_bpm, - max_bpm=self.max_bpm, - num_tempi=None, - fps=self.fps) # get the best state path by calling the viterbi algorithm - path, _ = dbn.hmm.viterbi(activations.astype(np.float32)) - intervals = dbn.st.state_intervals[path] + path, _ = self.dbn.hmm.viterbi(activations.astype(np.float32)) + intervals = self.dbn.st.state_intervals[path] + # get the counts of the bins + bins = np.bincount(intervals, + minlength=self.dbn.st.intervals.max() + 1) + # truncate everything below the minimum interval of the state space + bins = bins[self.dbn.st.intervals.min():] + # build a histogram together with the intervals and return it + return bins, self.dbn.st.intervals + + def process_online(self, activations, reset=True, **kwargs): + """ + Compute the histogram of the beat intervals with a DBN using the + forward algorithm. + + Parameters + ---------- + activations : numpy float + Beat activation function. + reset : bool, optional + Reset DBN to initial state before processing. + + Returns + ------- + histogram_bins : numpy array + Bins of the tempo histogram. + histogram_delays : numpy array + Corresponding delays [frames]. + + """ + # reset to initial state + if reset: + self.reset() + # use forward path to get best state + fwd = self.dbn.hmm.forward(activations, reset=reset) + # choose the best state for each step + states = np.argmax(fwd, axis=1) + intervals = self.dbn.st.state_intervals[states] # get the counts of the bins bins = np.bincount(intervals, - minlength=dbn.st.intervals.max() + 1) + minlength=self.dbn.st.intervals.max() + 1) # truncate everything below the minimum interval of the state space - bins = bins[dbn.st.intervals.min():] + bins = bins[self.dbn.st.intervals.min():] # build a histogram together with the intervals and return it - return bins, dbn.st.intervals + return bins, self.dbn.st.intervals class TempoEstimationProcessor(Processor): diff --git a/tests/test_features_tempo.py b/tests/test_features_tempo.py index bc5a7bbec..ad26a588b 100644 --- a/tests/test_features_tempo.py +++ b/tests/test_features_tempo.py @@ -31,6 +31,7 @@ [58.8235294, 0.187827698], [43.7956204, 0.139373027], [115.384615, 0.0749783568], [69.7674419, 0.0599632291], [50.4201681, 0.0535384559]] +DBN_TEMPI = np.array([[176.470, 1]]) HIST = interval_histogram_comb(act, 0.79, min_tau=24, max_tau=150) @@ -238,6 +239,46 @@ def test_tempo_online(self): [58.25242718, 0.1878183]])) +class TestDBNTempoHistogramProcessorClass(unittest.TestCase): + + def setUp(self): + self.processor = DBNTempoHistogramProcessor(fps=fps) + self.online_processor = DBNTempoHistogramProcessor(fps=fps, + online=True) + + def test_types(self): + self.assertIsInstance(self.processor.min_bpm, float) + self.assertIsInstance(self.processor.max_bpm, float) + self.assertIsInstance(self.processor.fps, float) + # properties + self.assertIsInstance(self.processor.min_interval, int) + self.assertIsInstance(self.processor.max_interval, int) + + def test_values(self): + self.assertTrue(self.processor.min_bpm == 40) + self.assertTrue(self.processor.max_bpm == 250) + self.assertTrue(self.processor.fps == 100) + self.assertTrue(self.processor.min_interval == 24) + self.assertTrue(self.processor.max_interval == 150) + + def test_tempo(self): + tempo_processor = TempoEstimationProcessor( + histogram_processor=self.processor, fps=fps) + tempi = tempo_processor(act) + self.assertTrue(np.allclose(tempi, DBN_TEMPI, atol=0.01)) + + def test_tempo_online(self): + tempo_processor = TempoEstimationProcessor( + histogram_processor=self.online_processor, fps=fps, online=True) + # TODO: fix requirement for atleast_2d + tempi = [tempo_processor(np.atleast_2d(a), reset=False) for a in act] + self.assertTrue(np.allclose(tempi[-1], DBN_TEMPI)) + # with resetting results are the same + tempo_processor.reset() + tempi = [tempo_processor(np.atleast_2d(a), reset=False) for a in act] + self.assertTrue(np.allclose(tempi[-1], DBN_TEMPI)) + + class TestWriteTempoFunction(unittest.TestCase): def setUp(self): From 38cac994b5aabfd6b5ec0e1f879d7c884d511b1e Mon Sep 17 00:00:00 2001 From: SebastianPoell Date: Fri, 9 Jun 2017 00:55:40 +0200 Subject: [PATCH 08/18] improved online verbose displaying of tempo --- madmom/features/tempo.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/madmom/features/tempo.py b/madmom/features/tempo.py index 73cfb82a9..33290e14a 100755 --- a/madmom/features/tempo.py +++ b/madmom/features/tempo.py @@ -783,7 +783,17 @@ def process_online(self, activations, reset=True, **kwargs): tempi.append(tempo) # visualize tempo if self.visualize: - sys.stderr.write('\r%s' % ''.join(str(tempo[:2, 0]))) + display = '' + # display the 3 most likely tempi and their strengths + for i, display_tempo in enumerate(tempo[:3], start=1): + # display tempo + display += '| ' + str(round(display_tempo[0], 1)) + ' ' + # display strength + display += min(int(display_tempo[1] * 50), 18) * '*' + # fill up the rest with spaces + display = display.ljust(i * 26) + # print the tempi + sys.stderr.write('\r%s' % ''.join(display) + '|') sys.stderr.flush() # return last detected tempo return tempi[-1] From 11593adc4d238d62a3a488470cddfdfd854d4d5e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20B=C3=B6ck?= Date: Wed, 30 Aug 2017 15:52:41 +0200 Subject: [PATCH 09/18] refactor process online/offline dispatching to OnlineProcessor --- madmom/features/beats.py | 40 +++++--------- madmom/features/onsets.py | 43 ++++----------- madmom/features/tempo.py | 67 ++++------------------ madmom/processors.py | 113 +++++++++++++++++++++++++++++++++++--- 4 files changed, 140 insertions(+), 123 deletions(-) diff --git a/madmom/features/beats.py b/madmom/features/beats.py index bba8bf198..f4460f381 100755 --- a/madmom/features/beats.py +++ b/madmom/features/beats.py @@ -10,11 +10,13 @@ from __future__ import absolute_import, division, print_function import sys + import numpy as np -from madmom.processors import Processor, SequentialProcessor, ParallelProcessor -from madmom.audio.signal import smooth as smooth_signal -from madmom.ml.nn import average_predictions +from ..audio.signal import smooth as smooth_signal +from ..ml.nn import average_predictions +from ..processors import (OnlineProcessor, ParallelProcessor, Processor, + SequentialProcessor, ) # classes for tracking (down-)beats with RNNs @@ -883,7 +885,7 @@ def _process_dbn(process_tuple): return process_tuple[0].viterbi(process_tuple[1]) -class DBNBeatTrackingProcessor(Processor): +class DBNBeatTrackingProcessor(OnlineProcessor): """ Beat tracking with RNNs and a dynamic Bayesian network (DBN) approximated by a Hidden Markov Model (HMM). @@ -1006,27 +1008,7 @@ def reset(self): self.last_beat = 0 self.tempo = 0 - def process(self, activations, **kwargs): - """ - Detect the beats in the given activation function. - - Parameters - ---------- - activations : numpy array - Beat activation function. - - Returns - ------- - beats : numpy array - Detected beat positions [seconds]. - - """ - if self.online: - return self.process_forward(activations, **kwargs) - else: - return self.process_viterbi(activations, **kwargs) - - def process_viterbi(self, activations, **kwargs): + def process_offline(self, activations, **kwargs): """ Detect the beats in the given activation function with Viterbi decoding. @@ -1091,7 +1073,7 @@ def process_viterbi(self, activations, **kwargs): # convert the detected beats to seconds and return them return (beats + first) / float(self.fps) - def process_forward(self, activations, reset=True, **kwargs): + def process_online(self, activations, reset=True, **kwargs): """ Detect the beats in the given activation function with the forward algorithm. @@ -1148,7 +1130,7 @@ def process_forward(self, activations, reset=True, **kwargs): sys.stderr.write('\r%s' % ''.join(display)) sys.stderr.flush() # forward path often reports multiple beats close together, thus report - # only beats more than the minumum interval apart + # only beats more than the minimum interval apart beats_ = [] for frame in np.nonzero(beats)[0]: cur_beat = (frame + self.counter) / float(self.fps) @@ -1167,6 +1149,10 @@ def process_forward(self, activations, reset=True, **kwargs): # return beat(s) return np.array(beats_) + process_forward = process_online + + process_viterbi = process_offline + @staticmethod def add_arguments(parser, min_bpm=MIN_BPM, max_bpm=MAX_BPM, num_tempi=NUM_TEMPI, transition_lambda=TRANSITION_LAMBDA, diff --git a/madmom/features/onsets.py b/madmom/features/onsets.py index 970fe8997..00f258d13 100755 --- a/madmom/features/onsets.py +++ b/madmom/features/onsets.py @@ -13,9 +13,9 @@ from scipy.ndimage import uniform_filter from scipy.ndimage.filters import maximum_filter -from ..processors import (Processor, SequentialProcessor, ParallelProcessor, - BufferProcessor) from ..audio.signal import smooth as smooth_signal +from ..processors import (BufferProcessor, OnlineProcessor, ParallelProcessor, + Processor, SequentialProcessor, ) from ..utils import combine_events EPSILON = np.spacing(1) @@ -1018,7 +1018,7 @@ def add_arguments(parser, **kwargs): return OnsetPeakPickingProcessor.add_arguments(parser, **kwargs) -class OnsetPeakPickingProcessor(Processor): +class OnsetPeakPickingProcessor(OnlineProcessor): """ This class implements the onset peak-picking functionality. It transparently converts the chosen values from seconds to frames. @@ -1100,10 +1100,9 @@ def __init__(self, threshold=THRESHOLD, smooth=SMOOTH, pre_avg=PRE_AVG, combine=COMBINE, delay=DELAY, online=ONLINE, fps=FPS, **kwargs): # pylint: disable=unused-argument - # TODO: make this an IOProcessor by defining input/output processings - # super(PeakPicking, self).__init__(peak_picking, write_events) - # adjust some params for online mode? - if online: + # instantiate OnlineProcessor + super(OnsetPeakPickingProcessor, self).__init__(online=online) + if self.online: # set some parameters to 0 (i.e. no future information available) smooth = 0 post_avg = 0 @@ -1121,7 +1120,6 @@ def __init__(self, threshold=THRESHOLD, smooth=SMOOTH, pre_avg=PRE_AVG, self.post_max = post_max self.combine = combine self.delay = delay - self.online = online self.fps = fps def reset(self): @@ -1130,27 +1128,7 @@ def reset(self): self.counter = 0 self.last_onset = None - def process(self, activations, **kwargs): - """ - Detect the onsets in the given activation function. - - Parameters - ---------- - activations : numpy array - Onset activation function. - - Returns - ------- - onsets : numpy array - Detected onsets [seconds]. - - """ - if self.online: - return self.process_online(activations, **kwargs) - else: - return self.process_sequence(activations, **kwargs) - - def process_sequence(self, activations, **kwargs): + def process_offline(self, activations, **kwargs): """ Detect the onsets in the given activation function. @@ -1245,6 +1223,8 @@ def process_online(self, activations, reset=True, **kwargs): # return the onsets return onsets + process_sequence = process_offline + @staticmethod def add_arguments(parser, threshold=THRESHOLD, smooth=None, pre_avg=None, post_avg=None, pre_max=None, post_max=None, @@ -1300,8 +1280,9 @@ def add_arguments(parser, threshold=THRESHOLD, smooth=None, pre_avg=None, '[default=%(default).2f]') if post_avg is not None: g.add_argument('--post_avg', action='store', type=float, - default=post_avg, help='build average over N ' - 'following seconds [default=%(default).2f]') + default=post_avg, + help='build average over N following seconds ' + '[default=%(default).2f]') if pre_max is not None: g.add_argument('--pre_max', action='store', type=float, default=pre_max, diff --git a/madmom/features/tempo.py b/madmom/features/tempo.py index 33290e14a..301995882 100755 --- a/madmom/features/tempo.py +++ b/madmom/features/tempo.py @@ -9,11 +9,12 @@ from __future__ import absolute_import, division, print_function -import numpy as np import sys -from madmom.processors import Processor, BufferProcessor -from madmom.audio.signal import smooth as smooth_signal +import numpy as np + +from ..audio.signal import smooth as smooth_signal +from ..processors import BufferProcessor, OnlineProcessor METHOD = 'comb' ALPHA = 0.79 @@ -243,7 +244,7 @@ def detect_tempo(histogram, fps): # tempo histogram processor classes -class TempoHistogramProcessor(Processor): +class TempoHistogramProcessor(OnlineProcessor): """ Tempo Histogram Processor class. @@ -269,10 +270,10 @@ class TempoHistogramProcessor(Processor): def __init__(self, min_bpm, max_bpm, fps=None, online=False, **kwargs): # pylint: disable=unused-argument + super(TempoHistogramProcessor, self).__init__(online=online) self.min_bpm = min_bpm self.max_bpm = max_bpm self.fps = fps - self.online = online @property def min_interval(self): @@ -289,31 +290,6 @@ def intervals(self): """Beat intervals [frames].""" return np.arange(self.min_interval, self.max_interval + 1) - def reset(self): - """Reset to initial state.""" - raise NotImplementedError('Must be implemented by subclass.') - - def process(self, activations, **kwargs): - """ - Compute the histogram of the beat intervals. - - Parameters - ---------- - activations : numpy array - Beat activation function. - - Returns - ------- - tempi : numpy array - Array with the dominant tempi [bpm] (first column) and their - relative strengths (second column). - - """ - if self.online: - return self.process_online(activations, **kwargs) - else: - return self.process_offline(activations, **kwargs) - class CombFilterTempoHistogramProcessor(TempoHistogramProcessor): """ @@ -429,7 +405,7 @@ class ACFTempoHistogramProcessor(TempoHistogramProcessor): Minimum tempo to detect [bpm]. max_bpm : float, optional Maximum tempo to detect [bpm]. - buffer_size : float, optional + hist_buffer : float, optional Use a buffer of this size for the activations to calculate the auto-correlation function [seconds]. fps : float, optional @@ -440,12 +416,12 @@ class ACFTempoHistogramProcessor(TempoHistogramProcessor): """ def __init__(self, min_bpm=MIN_BPM, max_bpm=MAX_BPM, - buffer_size=HIST_BUFFER, fps=None, online=False, **kwargs): + hist_buffer=HIST_BUFFER, fps=None, online=False, **kwargs): # pylint: disable=unused-argument super(ACFTempoHistogramProcessor, self).__init__( min_bpm=min_bpm, max_bpm=max_bpm, fps=fps, online=online, **kwargs) if self.online: - self.buffer = BufferProcessor(int(buffer_size * self.fps)) + self.buffer = BufferProcessor(int(hist_buffer * self.fps)) def reset(self): """Reset to initial state.""" @@ -599,7 +575,7 @@ def process_online(self, activations, reset=True, **kwargs): return bins, self.dbn.st.intervals -class TempoEstimationProcessor(Processor): +class TempoEstimationProcessor(OnlineProcessor): """ Tempo Estimation Processor class. @@ -653,11 +629,11 @@ def __init__(self, method=METHOD, min_bpm=MIN_BPM, max_bpm=MAX_BPM, act_smooth=ACT_SMOOTH, hist_smooth=HIST_SMOOTH, fps=None, online=False, histogram_processor=None, **kwargs): # pylint: disable=unused-argument + super(TempoEstimationProcessor, self).__init__(online=online) self.method = method self.act_smooth = act_smooth self.hist_smooth = hist_smooth self.fps = fps - self.online = online if self.online: self.visualize = kwargs.get('verbose', False) if histogram_processor is None: @@ -704,27 +680,6 @@ def reset(self): """Reset to initial state.""" self.histogram_processor.reset() - def process(self, activations, **kwargs): - """ - Detect the tempi from the (beat) activations. - - Parameters - ---------- - activations : numpy array - Beat activation function. - - Returns - ------- - tempi : numpy array - Array with the dominant tempi [bpm] (first column) and their - relative strengths (second column). - - """ - if self.online: - return self.process_online(activations, **kwargs) - else: - return self.process_offline(activations, **kwargs) - def process_offline(self, activations, **kwargs): """ Detect the tempi from the (beat) activations. diff --git a/madmom/processors.py b/madmom/processors.py index 6a2317667..6e2e07b59 100644 --- a/madmom/processors.py +++ b/madmom/processors.py @@ -15,16 +15,15 @@ from __future__ import absolute_import, division, print_function -import os -import sys import argparse import itertools as it import multiprocessing as mp +import os +import sys +from collections import MutableSequence import numpy as np -from collections import MutableSequence - class Processor(object): """ @@ -121,13 +120,110 @@ def process(self, data, **kwargs): Processed data. """ - raise NotImplementedError('must be implemented by subclass.') + raise NotImplementedError('Must be implemented by subclass.') def __call__(self, *args, **kwargs): # this magic method makes a Processor callable return self.process(*args, **kwargs) +class OnlineProcessor(Processor): + """ + Abstract base class for processing data in online mode. + + Derived classes must implement the following methods: + + - process_online(): process the data in online mode, + - process_offline(): process the data in offline mode. + + """ + + def __init__(self, online=False): + self.online = online + + def process(self, data, **kwargs): + """ + Process the data either in online or offline mode. + + Parameters + ---------- + data : depends on the implementation of subclass + Data to be processed. + kwargs : dict, optional + Keyword arguments for processing. + + Returns + ------- + depends on the implementation of subclass + Processed data. + + Notes + ----- + This method is used to pass the data to either `process_online` or + `process_offline`, depending on the `online` setting of the processor. + + """ + if self.online: + return self.process_online(data, **kwargs) + return self.process_offline(data, **kwargs) + + def process_online(self, data, reset=True, **kwargs): + """ + Process the data in online mode. + + This method must be implemented by the derived class and should process + the given data frame by frame and return the processed output. + + Parameters + ---------- + data : depends on the implementation of subclass + Data to be processed. + reset : bool, optional + Reset the processor to its initial state before processing. + kwargs : dict, optional + Keyword arguments for processing. + + Returns + ------- + depends on the implementation of subclass + Processed data. + + """ + raise NotImplementedError('Must be implemented by subclass.') + + def process_offline(self, data, **kwargs): + """ + Process the data in offline mode. + + This method must be implemented by the derived class and should process + the given data and return the processed output. + + Parameters + ---------- + data : depends on the implementation of subclass + Data to be processed. + kwargs : dict, optional + Keyword arguments for processing. + + Returns + ------- + depends on the implementation of subclass + Processed data. + + """ + raise NotImplementedError('Must be implemented by subclass.') + + def reset(self): + """ + Reset the OnlineProcessor. + + This method must be implemented by the derived class and should reset + the processor to its initial state. + + """ + raise NotImplementedError('Must be implemented by subclass.') + + class OutputProcessor(Processor): """ Class for processing data and/or feeding it into some sort of output. @@ -157,7 +253,7 @@ def process(self, data, output, **kwargs): """ # pylint: disable=arguments-differ - raise NotImplementedError('must be implemented by subclass.') + raise NotImplementedError('Must be implemented by subclass.') # functions for processing file(s) with a Processor @@ -198,9 +294,8 @@ def _process(process_tuple): elif isinstance(process_tuple[0], Processor): # call the Processor with data and kwargs return process_tuple[0](*process_tuple[1:-1], **process_tuple[-1]) - else: - # just call whatever we got here (e.g. a function) without kwargs - return process_tuple[0](*process_tuple[1:-1]) + # just call whatever we got here (e.g. a function) without kwargs + return process_tuple[0](*process_tuple[1:-1]) class SequentialProcessor(MutableSequence, Processor): From 8862e17be749270bacafdc6b98e04dc4291c1b73 Mon Sep 17 00:00:00 2001 From: SebastianPoell Date: Mon, 26 Jun 2017 20:40:49 +0200 Subject: [PATCH 10/18] real online computation of autocorrelation tempo histogram --- madmom/features/tempo.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/madmom/features/tempo.py b/madmom/features/tempo.py index 301995882..d8c657560 100755 --- a/madmom/features/tempo.py +++ b/madmom/features/tempo.py @@ -421,10 +421,12 @@ def __init__(self, min_bpm=MIN_BPM, max_bpm=MAX_BPM, super(ACFTempoHistogramProcessor, self).__init__( min_bpm=min_bpm, max_bpm=max_bpm, fps=fps, online=online, **kwargs) if self.online: + self.bins = np.zeros(len(self.intervals)) self.buffer = BufferProcessor(int(hist_buffer * self.fps)) def reset(self): """Reset to initial state.""" + self.bins = np.zeros(len(self.intervals)) self.buffer.reset() def process_offline(self, activations, **kwargs): @@ -472,11 +474,19 @@ def process_online(self, activations, reset=True, **kwargs): # reset to initial state if reset: self.reset() - # shift buffer and put new activations at end of buffer - activations = self.buffer(activations) - # use offline acf function on buffered activations - return interval_histogram_acf(activations, self.min_interval, - self.max_interval) + # select relevant activations from buffer for subtraction + buf = self.buffer.buffer[self.min_interval:self.max_interval + 1] + # subtract oldest acf values before activations are removed from buffer + # as long as the buffer is not filled this will subtract 0 + self.bins -= buf * self.buffer.buffer[0] + # shift buffer and put new activation at end of buffer + buf = self.buffer(activations) + # select relevant activations from buffer for addition + buf = buf[-self.max_interval - 1:-self.min_interval] + # add new acf values to bins + self.bins += np.flipud(buf * activations) + # return histogram + return np.array(self.bins), self.intervals class DBNTempoHistogramProcessor(TempoHistogramProcessor): From 1a9824e189f7aa664e426a0e5d00e595af33deb4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20B=C3=B6ck?= Date: Sat, 22 Jul 2017 16:14:44 +0200 Subject: [PATCH 11/18] add tests to histogram classes --- madmom/features/tempo.py | 4 ++ tests/test_features_tempo.py | 89 ++++++++++++++++++++++++++++++++++++ 2 files changed, 93 insertions(+) diff --git a/madmom/features/tempo.py b/madmom/features/tempo.py index d8c657560..44a422a21 100755 --- a/madmom/features/tempo.py +++ b/madmom/features/tempo.py @@ -373,6 +373,8 @@ def process_online(self, activations, reset=True, **kwargs): # reset to initial state if reset: self.reset() + if activations.size != 1: + raise NotImplementedError('can only be called frame by frame') # expand the activation for every tau activations = np.full(len(self.intervals), activations, dtype=np.float) # append it to the comb filter matrix @@ -474,6 +476,8 @@ def process_online(self, activations, reset=True, **kwargs): # reset to initial state if reset: self.reset() + if activations.size != 1: + raise NotImplementedError('can only be called frame by frame') # select relevant activations from buffer for subtraction buf = self.buffer.buffer[self.min_interval:self.max_interval + 1] # subtract oldest acf values before activations are removed from buffer diff --git a/tests/test_features_tempo.py b/tests/test_features_tempo.py index ad26a588b..77112ba81 100644 --- a/tests/test_features_tempo.py +++ b/tests/test_features_tempo.py @@ -191,6 +191,39 @@ def test_tempo_online(self): [85.7142857, 0.11437361], [115.384615, 0.10919612]])) + def test_process(self): + hist, delays = self.processor(act) + self.assertTrue(np.allclose(delays, np.arange(24, 151))) + self.assertTrue(np.allclose(hist.max(), 10.5064280455)) + self.assertTrue(np.allclose(hist.min(), 1.23250838113)) + self.assertTrue(np.allclose(hist.argmax(), 10)) + self.assertTrue(np.allclose(hist.argmin(), 44)) + self.assertTrue(np.allclose(np.sum(hist), 231.568316445)) + self.assertTrue(np.allclose(np.mean(hist), 1.82337257043)) + self.assertTrue(np.allclose(np.median(hist), 1.48112542203)) + + def test_process_online(self): + with self.assertRaises(NotImplementedError): + self.online_processor(act) + result = [self.online_processor(np.atleast_1d(a), reset=False) + for a in act] + # the final result must be the same as for offline processing + hist, delays = result[-1] + hist_, delays_ = self.processor(act) + self.assertTrue(np.allclose(hist, hist_)) + self.assertTrue(np.allclose(delays, delays_)) + # result after 100 frames + hist, delays = result[99] + print(hist.max(), hist.min(), hist.argmax(), hist.argmin()) + self.assertTrue(np.allclose(hist.max(), 2.03108930086)) + self.assertTrue(np.allclose(hist.min(), 1.23250838113)) + self.assertTrue(np.allclose(hist.argmax(), 12)) + self.assertTrue(np.allclose(hist.argmin(), 44)) + print(np.sum(hist), np.mean(hist), np.median(hist)) + self.assertTrue(np.allclose(np.sum(hist), 175.034206851)) + self.assertTrue(np.allclose(np.mean(hist), 1.37822210119)) + self.assertTrue(np.allclose(np.median(hist), 1.23250838113)) + class TestACFTempoHistogramProcessorClass(unittest.TestCase): @@ -238,6 +271,40 @@ def test_tempo_online(self): [86.95652174, 0.2248635], [58.25242718, 0.1878183]])) + def test_process(self): + hist, delays = self.processor(act) + self.assertTrue(np.allclose(delays, np.arange(24, 151))) + self.assertTrue(np.allclose(hist.max(), 0.772242703961)) + self.assertTrue(np.allclose(hist.min(), 0.0550745515184)) + self.assertTrue(np.allclose(hist.argmax(), 11)) + self.assertTrue(np.allclose(hist.argmin(), 103)) + self.assertTrue(np.allclose(np.sum(hist), 28.4273056042)) + self.assertTrue(np.allclose(np.mean(hist), 0.223837052001)) + self.assertTrue(np.allclose(np.median(hist), 0.147368463433)) + + def test_process_online(self): + with self.assertRaises(NotImplementedError): + self.online_processor(act) + result = [self.online_processor(np.atleast_1d(a), reset=False) + for a in act] + # the final result must be the same as for offline processing + hist, delays = result[-1] + hist_, delays_ = self.processor(act) + self.assertTrue(np.allclose(hist, hist_)) + self.assertTrue(np.allclose(delays, delays_)) + # result after 100 frames + hist, delays = result[99] + print(hist.max(), hist.min(), hist.argmax(), hist.argmin()) + self.assertTrue(np.allclose(hist.max(), 0.19544739526)) + self.assertTrue(np.allclose(hist.min(), 0)) + self.assertTrue(np.allclose(hist.argmax(), 46)) + self.assertTrue(np.allclose(hist.argmin(), 76)) + print(np.sum(hist), np.mean(hist), np.median(hist)) + self.assertTrue(np.allclose(np.sum(hist), 3.58546628975)) + self.assertTrue(np.allclose(np.mean(hist), 0.0282320180295)) + self.assertTrue(np.allclose(np.median(hist), 0.00471735456373)) + + class TestDBNTempoHistogramProcessorClass(unittest.TestCase): @@ -278,6 +345,28 @@ def test_tempo_online(self): tempi = [tempo_processor(np.atleast_2d(a), reset=False) for a in act] self.assertTrue(np.allclose(tempi[-1], DBN_TEMPI)) + def test_process(self): + hist, delays = self.processor(act) + self.assertTrue(np.allclose(delays, np.arange(24, 151))) + self.assertTrue(np.allclose(hist.max(), 281)) + self.assertTrue(np.allclose(hist.min(), 0)) + self.assertTrue(np.allclose(hist.argmax(), 10)) + self.assertTrue(np.allclose(hist.argmin(), 0)) + self.assertTrue(np.allclose(np.sum(hist), 281)) + self.assertTrue(np.allclose(np.mean(hist), 2.2125984252)) + self.assertTrue(np.allclose(np.median(hist), 0)) + + def test_process_online(self): + hist, delays = self.online_processor(act) + self.assertTrue(np.allclose(delays, np.arange(24, 151))) + self.assertTrue(np.allclose(hist.max(), 106)) + self.assertTrue(np.allclose(hist.min(), 0)) + self.assertTrue(np.allclose(hist.argmax(), 10)) + self.assertTrue(np.allclose(hist.argmin(), 1)) + self.assertTrue(np.allclose(np.sum(hist), 281)) + self.assertTrue(np.allclose(np.mean(hist), 2.2125984252)) + self.assertTrue(np.allclose(np.median(hist), 0)) + class TestWriteTempoFunction(unittest.TestCase): From fb0ed1483e98bca074d645a7bd6ccd10d9c6490d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20B=C3=B6ck?= Date: Mon, 24 Jul 2017 21:54:41 +0200 Subject: [PATCH 12/18] fix memory leak in online comb filter (and make it 30% faster) --- madmom/features/tempo.py | 38 +++++++++++++++++--------------------- 1 file changed, 17 insertions(+), 21 deletions(-) diff --git a/madmom/features/tempo.py b/madmom/features/tempo.py index 44a422a21..350b8d214 100755 --- a/madmom/features/tempo.py +++ b/madmom/features/tempo.py @@ -320,14 +320,15 @@ def __init__(self, min_bpm=MIN_BPM, max_bpm=MAX_BPM, alpha=ALPHA, min_bpm=min_bpm, max_bpm=max_bpm, fps=fps, online=online, **kwargs) self.alpha = alpha if self.online: - self.combfilter_matrix = [] - self.buffer = BufferProcessor((int(hist_buffer * self.fps), - len(self.intervals))) + self._comb_buffer = BufferProcessor((self.max_interval + 1, + len(self.intervals))) + self._hist_buffer = BufferProcessor((int(hist_buffer * self.fps), + len(self.intervals))) def reset(self): """Reset to initial state.""" - self.combfilter_matrix = [] - self.buffer.reset() + self._comb_buffer.reset() + self._hist_buffer.reset() def process_offline(self, activations, **kwargs): """ @@ -375,24 +376,19 @@ def process_online(self, activations, reset=True, **kwargs): self.reset() if activations.size != 1: raise NotImplementedError('can only be called frame by frame') - # expand the activation for every tau - activations = np.full(len(self.intervals), activations, dtype=np.float) - # append it to the comb filter matrix - self.combfilter_matrix.append(activations) - # online feed backward comb filter - min_tau = self.min_interval - for i in self.intervals: - if len(self.combfilter_matrix) > i: - self.combfilter_matrix[-1][i - min_tau] += self.alpha * \ - self.combfilter_matrix[-1 - i][i - min_tau] - # retrieve maxima - act_max = self.combfilter_matrix[-1] == \ - np.max(self.combfilter_matrix[-1], axis=-1) + # indices at which to retrieve y[n - τ] + idx = [-self.intervals, np.arange(len(self.intervals))] + # online feed backward comb filter (y[n] = x[n] + α * y[n - τ]) + y_n = activations + self.alpha * self._comb_buffer.buffer[idx] + # shift output buffer with new value + self._comb_buffer(y_n) + # determine the tau with the highest value + act_max = y_n == np.max(y_n, axis=-1)[..., np.newaxis] # compute the max bins - bins = self.combfilter_matrix[-1] * act_max - # use a buffer to only keep bins of the last seconds + bins = y_n * act_max + # use a buffer to only keep a certain number of bins # shift buffer and put new bins at end of buffer - bins = self.buffer(bins) + bins = self._hist_buffer(bins) # build a histogram together with the intervals and return it return np.sum(bins, axis=0), self.intervals From 6d7bfe032c27bfbf619f84b62a99b5302aa33f63 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20B=C3=B6ck?= Date: Mon, 24 Jul 2017 23:15:09 +0200 Subject: [PATCH 13/18] API: change internal BufferProcessor storage from `buffer` to `data` and add numpy-compatible slicing access --- CHANGES.rst | 1 + madmom/features/tempo.py | 6 +++--- madmom/processors.py | 27 ++++++++++++++++++++++----- tests/test_processors.py | 12 ++++++------ 4 files changed, 32 insertions(+), 14 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index eeac280e1..7bd59e48c 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -19,6 +19,7 @@ Bug fixes: API relevant changes: +* `BufferProcessor` uses `data` instead of `buffer` for data storage (#292) * `DBNBeatTrackingProcessor` expects 1D inputs (#299) Other changes: diff --git a/madmom/features/tempo.py b/madmom/features/tempo.py index 350b8d214..67d54513c 100755 --- a/madmom/features/tempo.py +++ b/madmom/features/tempo.py @@ -379,7 +379,7 @@ def process_online(self, activations, reset=True, **kwargs): # indices at which to retrieve y[n - τ] idx = [-self.intervals, np.arange(len(self.intervals))] # online feed backward comb filter (y[n] = x[n] + α * y[n - τ]) - y_n = activations + self.alpha * self._comb_buffer.buffer[idx] + y_n = activations + self.alpha * self._comb_buffer[idx] # shift output buffer with new value self._comb_buffer(y_n) # determine the tau with the highest value @@ -475,10 +475,10 @@ def process_online(self, activations, reset=True, **kwargs): if activations.size != 1: raise NotImplementedError('can only be called frame by frame') # select relevant activations from buffer for subtraction - buf = self.buffer.buffer[self.min_interval:self.max_interval + 1] + buf = self.buffer[self.min_interval:self.max_interval + 1] # subtract oldest acf values before activations are removed from buffer # as long as the buffer is not filled this will subtract 0 - self.bins -= buf * self.buffer.buffer[0] + self.bins -= buf * self.buffer[0] # shift buffer and put new activation at end of buffer buf = self.buffer(activations) # select relevant activations from buffer for addition diff --git a/madmom/processors.py b/madmom/processors.py index 6e2e07b59..c50693378 100644 --- a/madmom/processors.py +++ b/madmom/processors.py @@ -765,7 +765,7 @@ def __init__(self, buffer_size=None, init=None, init_value=0): # save variables self.buffer_size = buffer_size self.init = init - self.buffer = init + self.data = init def reset(self, init=None): """ @@ -777,7 +777,7 @@ def reset(self, init=None): Reset BufferProcessor to this initial state. """ - self.buffer = init if init is not None else self.init + self.data = init if init is not None else self.init def process(self, data, **kwargs): """ @@ -802,14 +802,31 @@ def process(self, data, **kwargs): # length of the data data_length = len(data) # remove `data_length` from buffer at the beginning and append new data - self.buffer = np.roll(self.buffer, -data_length, axis=0) - self.buffer[-data_length:] = data + self.data = np.roll(self.data, -data_length, axis=0) + self.data[-data_length:] = data # return the complete buffer - return self.buffer + return self.data # alias for easier / more intuitive calling buffer = process + def __getitem__(self, index): + """ + Direct access to the buffer data. + + Parameters + ---------- + index : int, slice, ndarray, + Any NumPy indexing method to access the buffer data directly. + + Returns + ------- + numpy array or subclass thereof + Requested view of the buffered data. + + """ + return self.data[index] + # function to process live input def process_online(processor, infile, outfile, **kwargs): diff --git a/tests/test_processors.py b/tests/test_processors.py index 540d4a3e7..2cd9236bb 100644 --- a/tests/test_processors.py +++ b/tests/test_processors.py @@ -30,7 +30,7 @@ class TestBufferProcessor(unittest.TestCase): def test_1d(self): buffer = BufferProcessor(5, init=np.zeros(5)) - self.assertTrue(np.allclose(buffer.buffer, 0)) + self.assertTrue(np.allclose(buffer.data, 0)) # shift in two new values result = buffer(np.arange(2)) self.assertTrue(np.allclose(result, [0, 0, 0, 0, 1])) @@ -44,9 +44,9 @@ def test_1d(self): def test_2d(self): buffer = BufferProcessor((5, 2), init=np.zeros((5, 2))) - print(buffer.buffer) - self.assertTrue(buffer.buffer.shape == (5, 2)) - self.assertTrue(np.allclose(buffer.buffer, 0)) + print(buffer.data) + self.assertTrue(buffer.data.shape == (5, 2)) + self.assertTrue(np.allclose(buffer.data, 0)) # shift in new values result = buffer(np.arange(2).reshape((1, -1))) self.assertTrue(result.shape == (5, 2)) @@ -72,11 +72,11 @@ def test_2d(self): def test_reset(self): buffer = BufferProcessor(5, init=np.ones(5)) - self.assertTrue(np.allclose(buffer.buffer, 1)) + self.assertTrue(np.allclose(buffer.data, 1)) result = buffer(np.arange(2)) self.assertTrue(np.allclose(result, [1, 1, 1, 0, 1])) buffer.reset() - self.assertTrue(np.allclose(buffer.buffer, 1)) + self.assertTrue(np.allclose(buffer.data, 1)) # clean up From 1f02859855667c4ad23457ce3078752579912419 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20B=C3=B6ck?= Date: Fri, 18 Aug 2017 09:56:37 +0200 Subject: [PATCH 14/18] comb filter online histogram can operate on any input length --- madmom/features/tempo.py | 75 +++++++++++++++++------------------- tests/test_features_tempo.py | 61 ++++++++++++++++++----------- 2 files changed, 73 insertions(+), 63 deletions(-) diff --git a/madmom/features/tempo.py b/madmom/features/tempo.py index 67d54513c..aa4ee4662 100755 --- a/madmom/features/tempo.py +++ b/madmom/features/tempo.py @@ -374,21 +374,21 @@ def process_online(self, activations, reset=True, **kwargs): # reset to initial state if reset: self.reset() - if activations.size != 1: - raise NotImplementedError('can only be called frame by frame') # indices at which to retrieve y[n - τ] idx = [-self.intervals, np.arange(len(self.intervals))] - # online feed backward comb filter (y[n] = x[n] + α * y[n - τ]) - y_n = activations + self.alpha * self._comb_buffer[idx] - # shift output buffer with new value - self._comb_buffer(y_n) - # determine the tau with the highest value - act_max = y_n == np.max(y_n, axis=-1)[..., np.newaxis] - # compute the max bins - bins = y_n * act_max - # use a buffer to only keep a certain number of bins - # shift buffer and put new bins at end of buffer - bins = self._hist_buffer(bins) + # iterate over all activations + for act in activations: + # online feed backward comb filter (y[n] = x[n] + α * y[n - τ]) + y_n = act + self.alpha * self._comb_buffer[idx] + # shift output buffer with new value + self._comb_buffer(y_n) + # determine the tau with the highest value + act_max = y_n == np.max(y_n, axis=-1)[..., np.newaxis] + # compute the max bins + bins = y_n * act_max + # use a buffer to only keep a certain number of bins + # shift buffer and put new bins at end of buffer + bins = self._hist_buffer(bins) # build a histogram together with the intervals and return it return np.sum(bins, axis=0), self.intervals @@ -735,33 +735,28 @@ def process_online(self, activations, reset=True, **kwargs): relative strengths (second column). """ - # multiple activations will result in multiple tempi - tempi = [] - # iterate over all activations - for activation in activations: - # build the tempo histogram depending on the chosen method - histogram = self.interval_histogram(activation, reset=reset) - # smooth the histogram - histogram = smooth_histogram(histogram, self.hist_smooth) - # detect the tempo and append it to the found tempi - tempo = detect_tempo(histogram, self.fps) - tempi.append(tempo) - # visualize tempo - if self.visualize: - display = '' - # display the 3 most likely tempi and their strengths - for i, display_tempo in enumerate(tempo[:3], start=1): - # display tempo - display += '| ' + str(round(display_tempo[0], 1)) + ' ' - # display strength - display += min(int(display_tempo[1] * 50), 18) * '*' - # fill up the rest with spaces - display = display.ljust(i * 26) - # print the tempi - sys.stderr.write('\r%s' % ''.join(display) + '|') - sys.stderr.flush() - # return last detected tempo - return tempi[-1] + # build the tempo histogram depending on the chosen method + histogram = self.interval_histogram(activations, reset=reset) + # smooth the histogram + histogram = smooth_histogram(histogram, self.hist_smooth) + # detect the tempo and append it to the found tempi + tempo = detect_tempo(histogram, self.fps) + # visualize tempo + if self.visualize: + display = '' + # display the 3 most likely tempi and their strengths + for i, display_tempo in enumerate(tempo[:3], start=1): + # display tempo + display += '| ' + str(round(display_tempo[0], 1)) + ' ' + # display strength + display += min(int(display_tempo[1] * 50), 18) * '*' + # fill up the rest with spaces + display = display.ljust(i * 26) + # print the tempi + sys.stderr.write('\r%s' % ''.join(display) + '|') + sys.stderr.flush() + # return tempo + return tempo def interval_histogram(self, activations, **kwargs): """ diff --git a/tests/test_features_tempo.py b/tests/test_features_tempo.py index 77112ba81..6a797c2a8 100644 --- a/tests/test_features_tempo.py +++ b/tests/test_features_tempo.py @@ -98,6 +98,7 @@ class TestTempoEstimationProcessorClass(unittest.TestCase): def setUp(self): self.processor = TempoEstimationProcessor(fps=fps) + self.online_processor = TempoEstimationProcessor(fps=fps, online=True) def test_types(self): self.assertIsInstance(self.processor.method, str) @@ -126,17 +127,16 @@ def test_process(self): self.assertTrue(np.allclose(tempi, COMB_TEMPI, atol=0.01)) def test_process_online(self): - processor = TempoEstimationProcessor(fps=fps, online=True) - tempi = [processor.process_online(np.atleast_1d(a), reset=False) - for a in act] - self.assertTrue(np.allclose(tempi[-1], COMB_TEMPI_ONLINE)) - # with resetting results are the same - processor.reset() - tempi = [processor.process_online(np.atleast_1d(a), reset=False) + # process all activations at once + tempi = self.online_processor(act, reset=False) + self.assertTrue(np.allclose(tempi, COMB_TEMPI_ONLINE)) + # process frame by frame; with resetting results are the same + self.online_processor.reset() + tempi = [self.online_processor(np.atleast_1d(a), reset=False) for a in act] self.assertTrue(np.allclose(tempi[-1], COMB_TEMPI_ONLINE)) # without resetting results are different - tempi = [processor.process_online(np.atleast_1d(a), reset=False) + tempi = [self.online_processor(np.atleast_1d(a), reset=False) for a in act] self.assertTrue(np.allclose(tempi[-1][:3], [[176.470588, 0.31322337], [85.7142857, 0.11437361], @@ -176,17 +176,15 @@ def test_tempo(self): def test_tempo_online(self): tempo_processor = TempoEstimationProcessor( histogram_processor=self.online_processor, fps=fps, online=True) - tempi = [tempo_processor.process_online(np.atleast_1d(a), reset=False) - for a in act] - self.assertTrue(np.allclose(tempi[-1], COMB_TEMPI_ONLINE)) - # with resetting results are the same + # process all activations at once + tempi = tempo_processor(act, reset=False) + self.assertTrue(np.allclose(tempi, COMB_TEMPI_ONLINE)) + # process frame by frame; with resetting results are the same tempo_processor.reset() - tempi = [tempo_processor.process_online(np.atleast_1d(a), reset=False) - for a in act] + tempi = [tempo_processor(np.atleast_1d(a), reset=False) for a in act] self.assertTrue(np.allclose(tempi[-1], COMB_TEMPI_ONLINE)) # without resetting results are different - tempi = [tempo_processor.process_online(np.atleast_1d(a), reset=False) - for a in act] + tempi = [tempo_processor(np.atleast_1d(a), reset=False) for a in act] self.assertTrue(np.allclose(tempi[-1][:3], [[176.470588, 0.31322337], [85.7142857, 0.11437361], [115.384615, 0.10919612]])) @@ -203,8 +201,15 @@ def test_process(self): self.assertTrue(np.allclose(np.median(hist), 1.48112542203)) def test_process_online(self): - with self.assertRaises(NotImplementedError): - self.online_processor(act) + # offline results + hist_offline, delays_offline = self.processor(act) + # calling with all activations at once + hist, delays = self.online_processor(act) + # result must be the same as for offline processing + self.assertTrue(np.allclose(hist, hist_offline)) + self.assertTrue(np.allclose(delays, delays_offline)) + # calling frame by frame after resetting + self.online_processor.reset() result = [self.online_processor(np.atleast_1d(a), reset=False) for a in act] # the final result must be the same as for offline processing @@ -214,15 +219,28 @@ def test_process_online(self): self.assertTrue(np.allclose(delays, delays_)) # result after 100 frames hist, delays = result[99] - print(hist.max(), hist.min(), hist.argmax(), hist.argmin()) self.assertTrue(np.allclose(hist.max(), 2.03108930086)) self.assertTrue(np.allclose(hist.min(), 1.23250838113)) self.assertTrue(np.allclose(hist.argmax(), 12)) self.assertTrue(np.allclose(hist.argmin(), 44)) - print(np.sum(hist), np.mean(hist), np.median(hist)) self.assertTrue(np.allclose(np.sum(hist), 175.034206851)) self.assertTrue(np.allclose(np.mean(hist), 1.37822210119)) self.assertTrue(np.allclose(np.median(hist), 1.23250838113)) + # the final result must be the same as for offline processing + hist, delays = result[-1] + self.assertTrue(np.allclose(hist, hist_offline)) + self.assertTrue(np.allclose(delays, delays_offline)) + # results must be different without resetting + result = [self.online_processor(np.atleast_1d(a), reset=False) + for a in act] + hist, delays = result[-1] + self.assertTrue(np.allclose(hist.max(), 18.1385269354)) + self.assertTrue(np.allclose(hist.min(), 1.23250838113)) + self.assertTrue(np.allclose(hist.argmax(), 11)) + self.assertTrue(np.allclose(hist.argmin(), 72)) + self.assertTrue(np.allclose(np.sum(hist), 332.668525522)) + self.assertTrue(np.allclose(np.mean(hist), 2.61943720884)) + self.assertTrue(np.allclose(np.median(hist), 1.96220625848)) class TestACFTempoHistogramProcessorClass(unittest.TestCase): @@ -294,18 +312,15 @@ def test_process_online(self): self.assertTrue(np.allclose(delays, delays_)) # result after 100 frames hist, delays = result[99] - print(hist.max(), hist.min(), hist.argmax(), hist.argmin()) self.assertTrue(np.allclose(hist.max(), 0.19544739526)) self.assertTrue(np.allclose(hist.min(), 0)) self.assertTrue(np.allclose(hist.argmax(), 46)) self.assertTrue(np.allclose(hist.argmin(), 76)) - print(np.sum(hist), np.mean(hist), np.median(hist)) self.assertTrue(np.allclose(np.sum(hist), 3.58546628975)) self.assertTrue(np.allclose(np.mean(hist), 0.0282320180295)) self.assertTrue(np.allclose(np.median(hist), 0.00471735456373)) - class TestDBNTempoHistogramProcessorClass(unittest.TestCase): def setUp(self): From 0beddf3501568d8cdd3e25b7871e00453a41b1c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20B=C3=B6ck?= Date: Fri, 18 Aug 2017 11:28:06 +0200 Subject: [PATCH 15/18] ACF online histogram can operate on any input length This variant is slower than the previous one but is more flexible. --- madmom/features/tempo.py | 36 +++++++++++++++++------------------- tests/test_features_tempo.py | 29 +++++++++++++++++++---------- 2 files changed, 36 insertions(+), 29 deletions(-) diff --git a/madmom/features/tempo.py b/madmom/features/tempo.py index aa4ee4662..6dd94be88 100755 --- a/madmom/features/tempo.py +++ b/madmom/features/tempo.py @@ -419,13 +419,14 @@ def __init__(self, min_bpm=MIN_BPM, max_bpm=MAX_BPM, super(ACFTempoHistogramProcessor, self).__init__( min_bpm=min_bpm, max_bpm=max_bpm, fps=fps, online=online, **kwargs) if self.online: - self.bins = np.zeros(len(self.intervals)) - self.buffer = BufferProcessor(int(hist_buffer * self.fps)) + self._act_buffer = BufferProcessor((self.max_interval + 1, 1)) + self._hist_buffer = BufferProcessor((int(hist_buffer * self.fps), + len(self.intervals))) def reset(self): """Reset to initial state.""" - self.bins = np.zeros(len(self.intervals)) - self.buffer.reset() + self._act_buffer.reset() + self._hist_buffer.reset() def process_offline(self, activations, **kwargs): """ @@ -472,21 +473,18 @@ def process_online(self, activations, reset=True, **kwargs): # reset to initial state if reset: self.reset() - if activations.size != 1: - raise NotImplementedError('can only be called frame by frame') - # select relevant activations from buffer for subtraction - buf = self.buffer[self.min_interval:self.max_interval + 1] - # subtract oldest acf values before activations are removed from buffer - # as long as the buffer is not filled this will subtract 0 - self.bins -= buf * self.buffer[0] - # shift buffer and put new activation at end of buffer - buf = self.buffer(activations) - # select relevant activations from buffer for addition - buf = buf[-self.max_interval - 1:-self.min_interval] - # add new acf values to bins - self.bins += np.flipud(buf * activations) - # return histogram - return np.array(self.bins), self.intervals + # iterate over all activations + # TODO: speed this up! + for act in activations: + # online ACF (y[n] = x[n] * x[n - τ]) + bins = act * self._act_buffer[-self.intervals].T + # shift activation buffer with new value + self._act_buffer(act) + # use a buffer to only keep a certain number of bins + # shift buffer and put new bins at end of buffer + bins = self._hist_buffer(bins) + # build a histogram together with the intervals and return it + return np.sum(bins, axis=0), self.intervals class DBNTempoHistogramProcessor(TempoHistogramProcessor): diff --git a/tests/test_features_tempo.py b/tests/test_features_tempo.py index 6a797c2a8..21255ec9f 100644 --- a/tests/test_features_tempo.py +++ b/tests/test_features_tempo.py @@ -274,17 +274,15 @@ def test_tempo(self): def test_tempo_online(self): tempo_processor = TempoEstimationProcessor( histogram_processor=self.online_processor, fps=fps, online=True) - tempi = [tempo_processor.process_online(np.atleast_1d(a), reset=False) - for a in act] - self.assertTrue(np.allclose(tempi[-1], ACF_TEMPI_ONLINE)) - # with resetting results are the same + # process all activations at once + tempi = tempo_processor(act, reset=False) + self.assertTrue(np.allclose(tempi, ACF_TEMPI_ONLINE)) + # process frame by frame; with resetting results are the same tempo_processor.reset() - tempi = [tempo_processor.process_online(np.atleast_1d(a), reset=False) - for a in act] + tempi = [tempo_processor(np.atleast_1d(a), reset=False) for a in act] self.assertTrue(np.allclose(tempi[-1], ACF_TEMPI_ONLINE)) # without resetting results are different - tempi = [tempo_processor.process_online(np.atleast_1d(a), reset=False) - for a in act] + tempi = [tempo_processor(np.atleast_1d(a), reset=False) for a in act] self.assertTrue(np.allclose(tempi[-1][:3], [[176.4705882, 0.2414368], [86.95652174, 0.2248635], [58.25242718, 0.1878183]])) @@ -301,8 +299,15 @@ def test_process(self): self.assertTrue(np.allclose(np.median(hist), 0.147368463433)) def test_process_online(self): - with self.assertRaises(NotImplementedError): - self.online_processor(act) + # offline results + hist_offline, delays_offline = self.processor(act) + # calling with all activations at once + hist, delays = self.online_processor(act) + # result must be the same as for offline processing + self.assertTrue(np.allclose(hist, hist_offline)) + self.assertTrue(np.allclose(delays, delays_offline)) + # calling frame by frame after resetting + self.online_processor.reset() result = [self.online_processor(np.atleast_1d(a), reset=False) for a in act] # the final result must be the same as for offline processing @@ -319,6 +324,10 @@ def test_process_online(self): self.assertTrue(np.allclose(np.sum(hist), 3.58546628975)) self.assertTrue(np.allclose(np.mean(hist), 0.0282320180295)) self.assertTrue(np.allclose(np.median(hist), 0.00471735456373)) + # the final result must be the same as for offline processing + hist, delays = result[-1] + self.assertTrue(np.allclose(hist, hist_offline)) + self.assertTrue(np.allclose(delays, delays_offline)) class TestDBNTempoHistogramProcessorClass(unittest.TestCase): From 410dfe1d531aadd3c67f58fc0e978ad88e1ab31e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20B=C3=B6ck?= Date: Fri, 18 Aug 2017 13:22:49 +0200 Subject: [PATCH 16/18] use a histogram buffer also for DBN tempo estimation --- madmom/features/tempo.py | 22 ++++++++++++++-------- tests/test_features_tempo.py | 22 ++++++++++++++++------ 2 files changed, 30 insertions(+), 14 deletions(-) diff --git a/madmom/features/tempo.py b/madmom/features/tempo.py index 6dd94be88..c7ad8b134 100755 --- a/madmom/features/tempo.py +++ b/madmom/features/tempo.py @@ -497,6 +497,8 @@ class DBNTempoHistogramProcessor(TempoHistogramProcessor): Minimum tempo to detect [bpm]. max_bpm : float, optional Maximum tempo to detect [bpm]. + hist_buffer : float + Aggregate the tempo histogram over `hist_buffer` seconds. fps : float, optional Frames per second. online : bool, optional @@ -504,8 +506,8 @@ class DBNTempoHistogramProcessor(TempoHistogramProcessor): """ - def __init__(self, min_bpm=MIN_BPM, max_bpm=MAX_BPM, fps=None, - online=False, **kwargs): + def __init__(self, min_bpm=MIN_BPM, max_bpm=MAX_BPM, + hist_buffer=HIST_BUFFER, fps=None, online=False, **kwargs): # pylint: disable=unused-argument super(DBNTempoHistogramProcessor, self).__init__( min_bpm=min_bpm, max_bpm=max_bpm, fps=fps, online=online, **kwargs) @@ -513,10 +515,14 @@ def __init__(self, min_bpm=MIN_BPM, max_bpm=MAX_BPM, fps=None, self.dbn = DBNBeatTrackingProcessor( min_bpm=self.min_bpm, max_bpm=self.max_bpm, fps=self.fps, online=online, **kwargs) + if self.online: + self._hist_buffer = BufferProcessor((int(hist_buffer * self.fps), + len(self.intervals))) def reset(self): """Reset DBN to initial state.""" self.dbn.hmm.reset() + self._hist_buffer.reset() def process_offline(self, activations, **kwargs): """ @@ -574,13 +580,13 @@ def process_online(self, activations, reset=True, **kwargs): # choose the best state for each step states = np.argmax(fwd, axis=1) intervals = self.dbn.st.state_intervals[states] - # get the counts of the bins - bins = np.bincount(intervals, - minlength=self.dbn.st.intervals.max() + 1) - # truncate everything below the minimum interval of the state space - bins = bins[self.dbn.st.intervals.min():] + # convert intervals to bins + bins = np.zeros((len(activations), len(self.intervals))) + bins[np.arange(len(activations)), intervals - self.min_interval] = 1 + # shift buffer and put new bins at end of buffer + bins = self._hist_buffer(bins) # build a histogram together with the intervals and return it - return bins, self.dbn.st.intervals + return np.sum(bins, axis=0), self.intervals class TempoEstimationProcessor(OnlineProcessor): diff --git a/tests/test_features_tempo.py b/tests/test_features_tempo.py index 21255ec9f..a5e145512 100644 --- a/tests/test_features_tempo.py +++ b/tests/test_features_tempo.py @@ -32,6 +32,9 @@ [115.384615, 0.0749783568], [69.7674419, 0.0599632291], [50.4201681, 0.0535384559]] DBN_TEMPI = np.array([[176.470, 1]]) +DBN_TEMPI_ONLINE = [[176.470588, 0.580877380], [86.9565217, 0.244729904], + [74.0740741, 0.127887992], [40.8163265, 0.0232523621], + [250.000000, 0.0232523621]] HIST = interval_histogram_comb(act, 0.79, min_tau=24, max_tau=150) @@ -361,13 +364,20 @@ def test_tempo(self): def test_tempo_online(self): tempo_processor = TempoEstimationProcessor( histogram_processor=self.online_processor, fps=fps, online=True) - # TODO: fix requirement for atleast_2d - tempi = [tempo_processor(np.atleast_2d(a), reset=False) for a in act] - self.assertTrue(np.allclose(tempi[-1], DBN_TEMPI)) - # with resetting results are the same + # process all activations at once + tempi = tempo_processor(act, reset=False) + self.assertTrue(np.allclose(tempi, DBN_TEMPI_ONLINE)) + # process frame by frame; with resetting results are the same + tempo_processor.reset() tempo_processor.reset() - tempi = [tempo_processor(np.atleast_2d(a), reset=False) for a in act] - self.assertTrue(np.allclose(tempi[-1], DBN_TEMPI)) + tempi = [tempo_processor(np.atleast_1d(a), reset=False) for a in act] + self.assertTrue(np.allclose(tempi[-1], DBN_TEMPI_ONLINE)) + # without resetting results are different + tempi = [tempo_processor(np.atleast_1d(a), reset=False) for a in act] + self.assertTrue(np.allclose(tempi[-1][:3], + [[176.4705882, 0.472499032], + [84.5070423, 0.432130320], + [74.0740741, 0.0699384753]])) def test_process(self): hist, delays = self.processor(act) From ceb5c7437fa3abfbe784a956019027f13755333b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20B=C3=B6ck?= Date: Fri, 18 Aug 2017 13:34:44 +0200 Subject: [PATCH 17/18] refactor histogram buffer to base class --- madmom/features/tempo.py | 45 ++++++++++++++++++++++------------------ 1 file changed, 25 insertions(+), 20 deletions(-) diff --git a/madmom/features/tempo.py b/madmom/features/tempo.py index c7ad8b134..da88433d5 100755 --- a/madmom/features/tempo.py +++ b/madmom/features/tempo.py @@ -254,6 +254,8 @@ class TempoHistogramProcessor(OnlineProcessor): Minimum tempo to detect [bpm]. max_bpm : float Maximum tempo to detect [bpm]. + hist_buffer : float + Aggregate the tempo histogram over `hist_buffer` seconds. fps : float, optional Frames per second. @@ -268,12 +270,17 @@ class TempoHistogramProcessor(OnlineProcessor): """ - def __init__(self, min_bpm, max_bpm, fps=None, online=False, **kwargs): + def __init__(self, min_bpm, max_bpm, hist_buffer=HIST_BUFFER, fps=None, + online=False, **kwargs): # pylint: disable=unused-argument super(TempoHistogramProcessor, self).__init__(online=online) self.min_bpm = min_bpm self.max_bpm = max_bpm + self.hist_buffer = hist_buffer self.fps = fps + if self.online: + self._hist_buffer = BufferProcessor((int(hist_buffer * self.fps), + len(self.intervals))) @property def min_interval(self): @@ -290,6 +297,10 @@ def intervals(self): """Beat intervals [frames].""" return np.arange(self.min_interval, self.max_interval + 1) + def reset(self): + """Reset the tempo histogram aggregation buffer.""" + self._hist_buffer.reset() + class CombFilterTempoHistogramProcessor(TempoHistogramProcessor): """ @@ -303,9 +314,8 @@ class CombFilterTempoHistogramProcessor(TempoHistogramProcessor): Maximum tempo to detect [bpm]. alpha : float, optional Scaling factor for the comb filter. - hist_buffer : float, optional - Use a buffer of this size to sum the max. bins in online mode - [seconds]. + hist_buffer : float + Aggregate the tempo histogram over `hist_buffer` seconds. fps : float, optional Frames per second. online : bool, optional @@ -317,18 +327,17 @@ def __init__(self, min_bpm=MIN_BPM, max_bpm=MAX_BPM, alpha=ALPHA, hist_buffer=HIST_BUFFER, fps=None, online=False, **kwargs): # pylint: disable=unused-argument super(CombFilterTempoHistogramProcessor, self).__init__( - min_bpm=min_bpm, max_bpm=max_bpm, fps=fps, online=online, **kwargs) + min_bpm=min_bpm, max_bpm=max_bpm, hist_buffer=hist_buffer, fps=fps, + online=online, **kwargs) self.alpha = alpha if self.online: self._comb_buffer = BufferProcessor((self.max_interval + 1, len(self.intervals))) - self._hist_buffer = BufferProcessor((int(hist_buffer * self.fps), - len(self.intervals))) def reset(self): """Reset to initial state.""" + super(CombFilterTempoHistogramProcessor, self).reset() self._comb_buffer.reset() - self._hist_buffer.reset() def process_offline(self, activations, **kwargs): """ @@ -403,9 +412,8 @@ class ACFTempoHistogramProcessor(TempoHistogramProcessor): Minimum tempo to detect [bpm]. max_bpm : float, optional Maximum tempo to detect [bpm]. - hist_buffer : float, optional - Use a buffer of this size for the activations to calculate the - auto-correlation function [seconds]. + hist_buffer : float + Aggregate the tempo histogram over `hist_buffer` seconds. fps : float, optional Frames per second. online : bool, optional @@ -417,16 +425,15 @@ def __init__(self, min_bpm=MIN_BPM, max_bpm=MAX_BPM, hist_buffer=HIST_BUFFER, fps=None, online=False, **kwargs): # pylint: disable=unused-argument super(ACFTempoHistogramProcessor, self).__init__( - min_bpm=min_bpm, max_bpm=max_bpm, fps=fps, online=online, **kwargs) + min_bpm=min_bpm, max_bpm=max_bpm, hist_buffer=hist_buffer, fps=fps, + online=online, **kwargs) if self.online: self._act_buffer = BufferProcessor((self.max_interval + 1, 1)) - self._hist_buffer = BufferProcessor((int(hist_buffer * self.fps), - len(self.intervals))) def reset(self): """Reset to initial state.""" + super(ACFTempoHistogramProcessor, self).reset() self._act_buffer.reset() - self._hist_buffer.reset() def process_offline(self, activations, **kwargs): """ @@ -510,19 +517,17 @@ def __init__(self, min_bpm=MIN_BPM, max_bpm=MAX_BPM, hist_buffer=HIST_BUFFER, fps=None, online=False, **kwargs): # pylint: disable=unused-argument super(DBNTempoHistogramProcessor, self).__init__( - min_bpm=min_bpm, max_bpm=max_bpm, fps=fps, online=online, **kwargs) + min_bpm=min_bpm, max_bpm=max_bpm, hist_buffer=hist_buffer, fps=fps, + online=online, **kwargs) from .beats import DBNBeatTrackingProcessor self.dbn = DBNBeatTrackingProcessor( min_bpm=self.min_bpm, max_bpm=self.max_bpm, fps=self.fps, online=online, **kwargs) - if self.online: - self._hist_buffer = BufferProcessor((int(hist_buffer * self.fps), - len(self.intervals))) def reset(self): """Reset DBN to initial state.""" + super(DBNTempoHistogramProcessor, self).reset() self.dbn.hmm.reset() - self._hist_buffer.reset() def process_offline(self, activations, **kwargs): """ From b4d53811d074f82c67d54eb67c9ee0ec934cc55b Mon Sep 17 00:00:00 2001 From: SebastianPoell Date: Wed, 6 Sep 2017 02:23:49 +0200 Subject: [PATCH 18/18] Multi Agent Beat Tracker --- madmom/features/beats.py | 374 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 373 insertions(+), 1 deletion(-) diff --git a/madmom/features/beats.py b/madmom/features/beats.py index f4460f381..ee2fcaff6 100755 --- a/madmom/features/beats.py +++ b/madmom/features/beats.py @@ -16,7 +16,7 @@ from ..audio.signal import smooth as smooth_signal from ..ml.nn import average_predictions from ..processors import (OnlineProcessor, ParallelProcessor, Processor, - SequentialProcessor, ) + SequentialProcessor, BufferProcessor) # classes for tracking (down-)beats with RNNs @@ -1238,6 +1238,378 @@ def add_arguments(parser, min_bpm=MIN_BPM, max_bpm=MAX_BPM, return g +class MultiAgentBeatTrackingProcessor(OnlineProcessor): + """ + Beat Tracking via Multiple Agents. Oriented towards the paper "Beat + Tracking for multiple applications: A multi-agent system architecture with + state recovery" by Lobate Oliveira et al., 2012 + + Parameters + ---------- + max_agents : int + Max number of agents. + num_tempi : int + Number of tempi to be considered. + induction_time : float + Window length in seconds used for inducting the agents. + + """ + MAX_AGENTS = 30 + NUM_TEMPI = 3 + INDUCTION_TIME = 2. + + class Agent(object): + """ + Agent class for tracking the beats. Each agent has a tempo, a score, + a current prediction where the next beat is and a complete history + of all previously detected beats. + + Parameters + ---------- + inner_window : int + Inner window in frames to each side where a beat is accepted. + outer_window : float + outer window factor to each side depending on the interval. + threshold : float + Threshold value for accepting a beat. + correction_factor : float + Allow agent to adapt to errors. + inherit_score_factor : float + Child agents will inherit a percentage of their parents. + + """ + # TODO: Should we use @classmethod to set all values from outside? + INNER_WINDOW = 5 + OUTER_WINDOW = 0.4 + THRESHOLD = 0.05 + CORRECTION_FACTOR = 0.25 + INHERIT_SCORE_FACTOR = 0.9 + + # used for normalizing the score, set from outside + _MAX_INTERVAL = None + _MIN_INTERVAL = None + + def __init__(self, score=0, interval=0, prediction=0, beats=[]): + self.score = score + self.interval = interval + self.prediction = prediction + self.beats = beats + + def __hash__(self): + return hash(self.prediction + self.interval) + + def __eq__(self, other): + # TODO: Maybe allow for slight variations here + eq_prediction = self.prediction == other.prediction + eq_interval = self.interval == other.interval + return eq_prediction and eq_interval + + def fork(self, error): + """ + Return child agents based on the given error. The childs + inherit a part of the parents score and all of the parents + detections. The interval and the prediction for the next beat + are adjusted to the error of the last prediction. + + """ + # TODO: Check whether all childs are really needed + agents = [ + # create agent with same tempo but adjusted prediction + self.__class__(score=self.score * self.INHERIT_SCORE_FACTOR, + interval=self.interval, + prediction=self.prediction + error, + beats=self.beats), + # create agent with adjusted tempo and prediction + self.__class__(score=self.score * self.INHERIT_SCORE_FACTOR, + interval=self.interval + error, + prediction=self.prediction + error, + beats=self.beats), + # create agent with adjusted tempo and prediction by half + self.__class__(score=self.score * self.INHERIT_SCORE_FACTOR, + interval=self.interval + int(error * 0.5), + prediction=self.prediction + int(error * 0.5), + beats=self.beats) + ] + # only return agents which are within tempo range + return [agent for agent in agents if + self._MIN_INTERVAL <= agent.interval <= self._MAX_INTERVAL] + + def accept(self, activation, idx): + """ + Accept beat at global frame position idx if the + activation exceeds the threshold. + + """ + if activation > self.THRESHOLD: + self.beats = self.beats + [idx] + + def process(self, activations, idx): + """ + Set the next prediction, score the agent and create + new child agents if necessary. This method is called after + the outer window has passed for offline and online. + + Parameters + ---------- + activations : list + Activation window which surrounds the prediction by + outer window length on both sides. For online mode we wait + until all of that information is available before calling + this method. + idx : int + Global frame counter index of the last activation. Since we + calculate everything in absolute frame times. + + Returns + ------- + agents : list + New child agent objects + + """ + # calculate frames to look around + frames = int(self.interval * self.OUTER_WINDOW) + # get predicted activation + act = activations[int(idx - self.prediction)] + # get max index within outer window + max_idx = idx - len(activations) + np.argmax(activations) + # distance between max activation and predicted position + error = max_idx - self.prediction + # faster agents should not get a better score + normalization = self.interval / self._MIN_INTERVAL + # if max activation was in inner window + if abs(error) <= self.INNER_WINDOW: + # if no beat has been accepted yet, accept max (for offline) + if self.beats[-1] < idx - len(activations): + self.accept(max(activations), max_idx) + # update prediction + self.prediction = max_idx + self.interval + # reward agent for detecting the beat + self.score += (1 - abs(error) / frames) * normalization * act + # adapt agent to error + self.interval += int(error * self.CORRECTION_FACTOR) + self.prediction += int(error * self.CORRECTION_FACTOR) + # return no new child agents + return [] + # if max activation was in outer window + else: + # if no beat has been accepted yet, accept act (for offline) + if self.beats[-1] < idx - len(activations): + self.accept(act, self.prediction) + # update prediction + self.prediction += self.interval + # create child agents + new_agents = self.fork(error) + # penalize agent for not detecting the beat + self.score -= (abs(error) / frames) * normalization * act + # return new child agents + return new_agents + + def __init__(self, fps=None, tempo_estimator=None, online=False, **kwargs): + # pylint: disable=unused-argument + super(MultiAgentBeatTrackingProcessor, self).__init__(online=online) + # save variables + self.fps = fps + # tempo estimator + if tempo_estimator is None: + # import the TempoEstimation here otherwise we have a loop + from .tempo import TempoEstimationProcessor + # create default tempo estimator + tempo_estimator = TempoEstimationProcessor(fps=fps, **kwargs) + self.tempo_estimator = tempo_estimator + self.agents = [] + # TODO: Not sure if thats nice? + self.Agent._MIN_INTERVAL = tempo_estimator.min_interval + self.Agent._MAX_INTERVAL = tempo_estimator.max_interval + if self.online: + self.visualize = kwargs.get('verbose', False) + self.buffer = BufferProcessor(int(self.INDUCTION_TIME * self.fps)) + self.last_beat = 0 + self.counter = 0 + + def reset(self): + """Reset the MultiAgentBeatTrackingProcessor.""" + self.buffer.reset() + self.agents = [] + self.last_beat = 0 + self.counter = 0 + + def process_offline(self, activations, **kwargs): + """ + Detect the beats in the given activation function. + + Parameters + ---------- + activations : numpy array + Beat activation function. + + Returns + ------- + beats : numpy array + Detected beat positions [seconds]. + + """ + # smooth activations + act_smooth = int(self.fps * self.tempo_estimator.act_smooth) + activations = smooth_signal(activations, act_smooth) + # create an interval histogram over the induction time window + induction_window = activations[:int(self.INDUCTION_TIME * self.fps)] + histogram = self.tempo_estimator.interval_histogram(induction_window) + # get N most likely tempi + from .tempo import detect_tempo + tempi = detect_tempo(histogram, self.fps)[:self.NUM_TEMPI, 0] + # convert tempi to intervals + intervals = 60.0 * self.fps / tempi + # induct agents for each interval + for interval in intervals.astype(int): + self.induct_agents(activations[:interval], interval) + # iterate through all activations + for idx, activation in enumerate(activations): + # process activation for each agent + new_agents = [] + for agent in self.agents: + # calculate number of frames to look around the prediction + # TODO: Should this be calculated as a @property in agent? + outer_frames = int(agent.interval * agent.OUTER_WINDOW) + # skip if not the time yet for this agent + if agent.prediction + outer_frames != idx: + continue + # get activations of outer windows surrounding the prediction + context = activations[max(0, idx - outer_frames * 2):idx] + # process agent and extend new agents + new_agents.extend(agent.process(context, idx)) + # TODO: Those lines are the same for offline/online: refactor? + # append new agents to agents list + self.agents.extend(new_agents) + # sort all agents by score + self.agents.sort(key=lambda a: a.score, reverse=True) + # remove duplicates by using the agents __eq__ method + self.agents = list(dict.fromkeys(self.agents)) + # kill worst agents if too many + self.agents = self.agents[:self.MAX_AGENTS] + # return beats of best agent + return np.array(self.agents[0].beats) / self.fps + + def process_online(self, activations, reset=True, **kwargs): + """ + Detect the beats in the given activation function for online mode. + + Parameters + ---------- + activations : numpy array + Beat activation function. + reset : bool, optional + Reset the BeatTrackingProcessor to its initial state before + processing. + + Returns + ------- + beats : numpy array + Detected beat positions [seconds]. + + """ + # reset to initial state + if reset: + self.reset() + beats_ = [] + for activation in activations: + # shift buffer and put new activation at end of buffer + buffer = self.buffer(activation) + # induct agents after induction time has passed + if self.counter == self.INDUCTION_TIME * self.fps: + # create histogram of induction window + histogram = self.tempo_estimator.interval_histogram(buffer) + # get N most likely tempi + from .tempo import detect_tempo + tempi = detect_tempo(histogram, self.fps)[:self.NUM_TEMPI, 0] + # convert tempi to intervals + intervals = 60.0 * self.fps / tempi + # induct agents on past interval frames + for interval in intervals.astype(int): + act = buffer[-interval:] + self.induct_agents(act, interval, self.counter - interval) + # guess beat if possible for each agent + for agent in self.agents: + # skip if beat was already detected inside this inner window + if agent.beats[-1] > self.counter - agent.INNER_WINDOW * 2: + continue + # skip if not the time yet for this agent to guess + if self.counter < agent.prediction or \ + self.counter > agent.prediction + agent.INNER_WINDOW: + continue + # get max activation of the past inner window + max_act = max(buffer[-agent.INNER_WINDOW:]) + # accept the current frame as a beat + agent.accept(max_act, self.counter) + # set score and predictions deferred after outer window has passed + # this way we get a little peek into the future + new_agents = [] + for agent in self.agents: + # calculate number of frames to look around the prediction + outer_frames = int(agent.interval * agent.OUTER_WINDOW) + # skip if not the time yet for this agent + if agent.prediction + outer_frames != self.counter: + continue + # get activations of outer windows surrounding the prediction + context = buffer[-outer_frames * 2:] + # process agent and extend new agents + new_agents.extend(agent.process(context, self.counter)) + # append new agents to agents list + self.agents.extend(new_agents) + # sort all agents by score + self.agents.sort(key=lambda a: a.score, reverse=True) + # remove duplicates by using the agents __eq__ method + self.agents = list(dict.fromkeys(self.agents)) + # kill worst agents if too many + self.agents = self.agents[:self.MAX_AGENTS] + # if best agent found a beat this frame + is_beat = self.agents and self.agents[0].beats[-1] == self.counter + # beats have to lie apart at least min_interval + beat_distance = self.counter - self.tempo_estimator.min_interval + # if current frame is considered a beat return it as result + if is_beat and self.last_beat < beat_distance: + beats_.append(self.counter) + self.last_beat = self.counter + # increase frame counter + self.counter += 1 + # return beat(s) + return np.array(beats_) / self.fps + + def induct_agents(self, activations, interval, start=0): + """ + Introduce agents with a given interval by letting them start at + the biggest N maxima inside the given activations. + + Parameters + ---------- + activations : list + Activation function window where agents should be introduced. + interval : int + Time interval which the introduced agents should have. + start : int, optional + Global frame number where agents start. + + """ + from scipy.signal import argrelextrema + # get all maxima within activations window + maxima = argrelextrema(activations, np.greater)[0] + # if no maxima could be found just use max value + if len(maxima) == 0: + maxima = np.array([activations.argmax()]) + # pick N maxima indices where activation is highest + best_idx = activations[maxima].argsort(axis=0)[::-1][:self.MAX_AGENTS] + # pick best maxima + best_maxima = maxima[best_idx] + # for each best maxima init an agent + for max_idx in best_maxima: + new_agent = self.Agent(score=activations[max_idx], + interval=interval, + prediction=start + max_idx + interval, + beats=[start + max_idx]) + # append new agent to agents list + # TODO: Should this method return the agents instead of appending? + self.agents.append(new_agent) + + class DBNDownBeatTrackingProcessor(Processor): """ Downbeat tracking with RNNs and a dynamic Bayesian network (DBN)