Merge branch 'spectral-envelope'

* spectral-envelope: LIB: LiveDecoderSource: make set_portamento_freq pure virtual Every source should implement it. LIB: MorphSourceModule: ignore set_portamento_freq() for now JACK: ignore set_portamento_freq for live decoder source TESTS: ignore set_portamento_freq for live decoder sources LIB: EffectDecoderSource: support set_portamento_freq LIB: MorphGridModule: whitespace fix TODO+++ LIB: Encoder: use fundamental frequency estimation from AudioTool LIB: AudioBlock: use fundamental frequency estimation from AudioTool LIB: AudioTool: add fundamental frequency estimation algorithm This used to be part of AudioBlock, but having it available in AudioTool makes it possible to estimate a fundamental frequency without AudioBlock. LIB: MorphWavSource: remove unused property names LIB: VoiceSource: use const fuzzy freq for each frac [0..1] range LIB: VoiceSource: randomize start value of fuzzy frac to range [0..1] LIB: Encoder: merge window_weight computations of different steps LIB: WavData: remove obsolete FIXME LIB: VoiceSource: optimize detune factor generation LIB: VoiceSource: set maximum number of resynthesis partials to 1000 LIB: VoiceSource: generate detune factors as needed (performance) LIB: VoiceSource: compute number of resynth partials before loop LIB: VoiceSource: generate detune factors on demand (performance) LIB: VoiceSource: avoid generating inaudible partials (performance) LIB: VoiceSource: optimize magnitude normalization step LIB: WavSource: remove formants/spectral mode LIB: VoiceSource: avoid allocations in process_block() LIB: VoiceSource: remove FFT related member variables LIB: WavSource: Resynthesis: use required number of partials (performance) LIB: MorphWavSourceModule: remove debugging output LIB: WavSource: hard code fuzzy resynth freq and max fuzzy resynth GLUI: MorphWavSourceView: only show fuzzy resynth in formant resynth mode LIB: WavSource: use percent parameter for fuzzy resynth TESTS: testmidisynth: support pitch expression in scripts LIB: WavSetBuilder: adapt block fundamental during auto tune LIB: AudioTool: update spectral envelope parameters properly GLUI: MorphWavSourceView: add UI for formant correction LIB: MorphWavSourceModule: implement formant correction LIB: MorphWavSource: add config for formant correction LIB: MorphLinearModule: implement set_portamento_freq() LIB: MorphGridModule: implement set_portamento_freq LIB: LiveDecoder: provide portamento frequency to source LIB: Encoder: estimate block spectral envelope LIB: AudioBlock: add fields for spectral envelope and block f0 Signed-off-by: Stefan Westerfeld <[email protected]>
swesterfeld · Apr 10, 2024 · 7285766 · 7285766
2 parents 37863e9 + 5dc7bd1
commit 7285766
Show file tree

Hide file tree

Showing 28 changed files with 495 additions and 79 deletions.
diff --git a/TODO b/TODO
@@ -1,4 +1,11 @@
 NEWER TODOS:
+VoiceSource:
+- better names for WavSource formant correction
+- better names inside VoiceSource; rename VoiceSource
+- [maybe] support formant correction for Source
+- experiments for handling freq < 1 in spectral envelope
+- experiments for start phase randomization
+Other:
 - portamento should also affect filter cutoff (key tracking)
 - 1-instrument-wav-source contains the data - not a good idea!
 - LV2 archive storage doesn't seem to work

diff --git a/glui/smmorphwavsourceview.cc b/glui/smmorphwavsourceview.cc
@@ -51,6 +51,12 @@ MorphWavSourceView::MorphWavSourceView (Widget *parent, MorphWavSource *morph_wa
   // POSITION
   pv_position = add_property_view (MorphWavSource::P_POSITION, op_layout);
 
+  // FORMANT CORRECT
+  auto pv_formant_correct = add_property_view (MorphWavSource::P_FORMANT_CORRECT, op_layout);
+  prop_formant_correct = pv_formant_correct->property();
+  connect (prop_formant_correct->signal_value_changed, this, &MorphWavSourceView::update_visible);
+  pv_fuzzy_resynth = add_property_view (MorphWavSource::P_FUZZY_RESYNTH, op_layout);
+
   update_visible();
 
   instrument_label->set_x (0);
@@ -272,6 +278,9 @@ MorphWavSourceView::update_visible()
   bool custom_position = (prop_play_mode->get() == MorphWavSource::PLAY_MODE_CUSTOM_POSITION);
   pv_position->set_visible (custom_position);
 
+  bool resynth = (prop_formant_correct->get() == MorphWavSource::FORMANT_RESYNTH);
+  pv_fuzzy_resynth->set_visible (resynth);
+
   op_layout.activate();
   signal_size_changed();
 }
diff --git a/glui/smmorphwavsourceview.hh b/glui/smmorphwavsourceview.hh
@@ -26,8 +26,10 @@ class MorphWavSourceView : public MorphOperatorView
   UserInstrumentIndex *user_instrument_index = nullptr;
   std::unique_ptr<Instrument> edit_instrument; // temporary copy used for editing
 
-  Property         *prop_play_mode;
-  PropertyView     *pv_position;
+  Property         *prop_play_mode = nullptr;
+  PropertyView     *pv_position = nullptr;
+  Property         *prop_formant_correct = nullptr;
+  PropertyView     *pv_fuzzy_resynth = nullptr;
   OperatorLayout    op_layout;
 
   void on_edit();

diff --git a/jack/smsimplejackplayer.cc b/jack/smsimplejackplayer.cc
@@ -20,6 +20,7 @@ class Source : public LiveDecoderSource
   void retrigger (int, float, int) override;
   Audio* audio() override;
   bool rt_audio_block (size_t index, RTAudioBlock& out_block) override;
+  void set_portamento_freq (float freq) override;
 };
 
 Source::Source (Audio *audio) :
@@ -52,6 +53,12 @@ Source::rt_audio_block (size_t index, RTAudioBlock& out_block)
     }
 }
 
+void
+Source::set_portamento_freq (float freq)
+{
+  // ignore
+}
+
 }
 
 static int

diff --git a/lib/smaudio.cc b/lib/smaudio.cc
@@ -8,6 +8,7 @@
 #include "smmemout.hh"
 #include "smmmapin.hh"
 #include "smwavsetrepo.hh"
+#include "smaudiotool.hh"
 #include <fcntl.h>
 #include <errno.h>
 #include <stdio.h>
@@ -144,6 +145,13 @@ SpectMorph::Audio::load (GenericIn *file, AudioLoadOptions load_options)
               else
                 printf ("unhandled float %s  %s\n", section.c_str(), ifile.event_name().c_str());
             }
+          else if (section == "frame")
+            {
+              if (ifile.event_name() == "env_f0")
+                audio_block->env_f0 = ifile.event_float();
+              else
+                printf ("unhandled float %s  %s\n", section.c_str(), ifile.event_name().c_str());
+            }
           else
             assert (false);
         }
@@ -206,6 +214,10 @@ SpectMorph::Audio::load (GenericIn *file, AudioLoadOptions load_options)
             {
               audio_block->phases = ib;
             }
+          else if (ifile.event_name() == "env")
+            {
+              audio_block->env = ib;
+            }
           else if (ifile.event_name() == "noise")
             {
               audio_block->noise = ib;
@@ -300,6 +312,8 @@ SpectMorph::Audio::save (GenericOut *file) const
       of.write_uint16_block ("freqs", contents[i].freqs);
       of.write_uint16_block ("mags", contents[i].mags);
       of.write_uint16_block ("phases", contents[i].phases);
+      of.write_uint16_block ("env", contents[i].env);
+      of.write_float ("env_f0", contents[i].env_f0);
       of.write_float_block ("original_fft", contents[i].original_fft);
       of.write_float_block ("debug_samples", contents[i].debug_samples);
       of.end_section();
@@ -435,43 +449,11 @@ AudioBlock::sort_freqs()
 }
 
 double
-AudioBlock::estimate_fundamental (int n_partials, double *mag) const
+AudioBlock::estimate_fundamental (int n_partials) const
 {
-  g_return_val_if_fail (n_partials >= 1 && n_partials <= 3, 1.0);
-
-  double est_freq = 0, est_mag = 0;
-
-  auto update_estimate = [&] (int n, double freq_min, double freq_max)
-    {
-      if (n > n_partials)
-        return;
-
-      double best_freq = 0, best_mag = 0;
+  AudioTool::FundamentalEst f_est;
+  for (size_t p = 0; p < freqs.size(); p++)
+    f_est.add_partial (freqs_f (p), mags_f (p));
 
-      for (size_t p = 0; p < mags.size(); p++)
-        {
-          if (freqs_f (p) > freq_min && freqs_f (p) < freq_max && mags_f (p) > best_mag)
-            {
-              best_mag = mags_f (p);
-              best_freq = freqs_f (p) / n;
-            }
-        }
-      if (best_mag > 0)
-        {
-          est_mag += best_mag;
-          est_freq += best_freq * best_mag;
-        }
-    };
-
-  update_estimate (1, 0.8, 1.25);
-  update_estimate (2, 1.5, 2.5);
-  update_estimate (3, 2.5, 3.5);
-
-  if (mag)
-    *mag = est_mag;
-
-  if (est_mag > 0)
-    return est_freq / est_mag;
-  else
-    return 1;
+  return f_est.fundamental (n_partials);
 }
diff --git a/lib/smaudio.hh b/lib/smaudio.hh
@@ -31,11 +31,13 @@ public:
   std::vector<uint16_t> freqs;       //!< frequencies of the sine components of this frame
   std::vector<uint16_t> mags;        //!< magnitudes of the sine components
   std::vector<uint16_t> phases;      //!< phases of the sine components
+  std::vector<uint16_t> env;         //!< spectral envelope for formant correction
+  float                 env_f0 = 1;  //!< fundamental frequency of the spectral envelope
   std::vector<float> original_fft;   //!< original zeropadded FFT data - for debugging only
   std::vector<float> debug_samples;  //!< original audio samples for this frame - for debugging only
 
   void    sort_freqs();
-  double  estimate_fundamental (int n_partials = 1, double *mag = nullptr) const;
+  double  estimate_fundamental (int n_partials = 1) const;
 
   double
   freqs_f (size_t i) const
@@ -56,6 +58,12 @@ public:
     return phases[i] * factor;
   }
 
+  double
+  env_f (size_t i) const
+  {
+    return sm_idb2factor (env[i]);
+  }
+
   double
   noise_f (size_t i) const
   {

diff --git a/lib/smaudiotool.cc b/lib/smaudiotool.cc
@@ -72,6 +72,10 @@ AudioTool::normalize_factor (double norm, Audio& audio)
       vector<uint16_t>& noise = audio.contents[f].noise;
       for (size_t i = 0; i < noise.size(); i++)
         noise[i] = sm_bound<int> (0, noise[i] + norm_delta_idb, 65535);
+
+      vector<uint16_t>& env = audio.contents[f].env;
+      for (size_t i = 0; i < env.size(); i++)
+        env[i] = sm_bound<int> (0, env[i] + norm_delta_idb, 65535);
     }
 
   // store normalization in order to replay original samples normalized
@@ -137,18 +141,21 @@ AudioTool::get_auto_tune_factor (Audio& audio, double& tune_factor)
 }
 
 void
-AudioTool::apply_auto_tune_factor (Audio& audio, double tune_factor)
+AudioTool::apply_auto_tune_factor (AudioBlock& audio_block, double tune_factor)
 {
-  for (size_t f = 0; f < audio.contents.size(); f++)
+  for (size_t n = 0; n < audio_block.freqs.size(); n++)
     {
-      AudioBlock& block = audio.contents[f];
-
-      for (size_t n = 0; n < block.freqs.size(); n++)
-        {
-          const double freq = block.freqs_f (n) * tune_factor;
-          block.freqs[n] = sm_freq2ifreq (freq);
-        }
+      const double freq = audio_block.freqs_f (n) * tune_factor;
+      audio_block.freqs[n] = sm_freq2ifreq (freq);
     }
+  audio_block.env_f0 *= tune_factor;
+}
+
+void
+AudioTool::apply_auto_tune_factor (Audio& audio, double tune_factor)
+{
+  for (auto& audio_block : audio.contents)
+    apply_auto_tune_factor (audio_block, tune_factor);
 }
 
 void
@@ -178,12 +185,40 @@ AudioTool::auto_tune_smooth (Audio& audio, int partials, double smooth_ms, doubl
       double dest_freq = (freq_vector[f] / smooth_freq - 1) * interp + 1;
       const double tune_factor = dest_freq / freq_vector[f];
 
-      AudioBlock& block = audio.contents[f];
+      apply_auto_tune_factor (audio.contents[f], tune_factor);
+    }
+}
 
-      for (size_t p = 0; p < block.freqs.size(); p++)
+void
+AudioTool::FundamentalEst::add_partial (double freq, double mag)
+{
+  auto update_estimate = [&] (int n, double freq_min, double freq_max)
+    {
+      if (freq > freq_min && freq < freq_max && mag > m_best_mag[n])
         {
-          const double freq = block.freqs_f (p) * tune_factor;
-          block.freqs[p] = sm_freq2ifreq (freq);
+          m_best_freq[n] = freq / n;
+          m_best_mag[n] = mag;
         }
+    };
+  update_estimate (1, 0.8, 1.25);
+  update_estimate (2, 1.5, 2.5);
+  update_estimate (3, 2.5, 3.5);
+}
+
+double
+AudioTool::FundamentalEst::fundamental (int n_partials) const
+{
+  g_return_val_if_fail (n_partials >= 1 && n_partials <= 3, 1.0);
+
+  double fsum = 0, msum = 0;
+  for (int i = 1; i <= n_partials; i++)
+    {
+      fsum += m_best_freq[i] * m_best_mag[i];
+      msum += m_best_mag[i];
     }
+
+  if (msum > 0)
+    return fsum / msum;
+  else
+    return 1;
 }
diff --git a/lib/smaudiotool.hh b/lib/smaudiotool.hh
@@ -17,6 +17,7 @@ void normalize_energy (double energy, Audio& audio);
 
 bool get_auto_tune_factor (Audio& audio, double& tune_factor);
 void apply_auto_tune_factor (Audio& audio, double tune_factor);
+void apply_auto_tune_factor (AudioBlock& audio_block, double tune_factor);
 void auto_tune_smooth (Audio& audio, int partials, double smooth_ms, double smooth_percent);
 
 class Block2Energy
@@ -28,6 +29,16 @@ public:
   double energy (const AudioBlock& block);
 };
 
+class FundamentalEst
+{
+  // 1 extra element to make code more readable: best_freq[1] / best_mag[1] corresponds to partial 1
+  std::array<double, 4> m_best_freq {};
+  std::array<double, 4> m_best_mag {};
+public:
+  void   add_partial (double freq, double mag);
+  double fundamental (int n_partials) const;
+};
+
 }
 
 }

diff --git a/lib/smeffectdecoder.cc b/lib/smeffectdecoder.cc
@@ -78,6 +78,7 @@ class EffectDecoderSource : public LiveDecoderSource
   void retrigger (int channel, float freq, int midi_velocity) override;
   Audio* audio() override;
   bool rt_audio_block (size_t index, RTAudioBlock& out_block) override;
+  void set_portamento_freq (float freq) override;
 
   void set_skip (float m_skip);
   void set_source (LiveDecoderSource *source);
@@ -106,6 +107,13 @@ EffectDecoderSource::rt_audio_block (size_t index, RTAudioBlock& out_block)
   return MorphUtils::get_normalized_block (m_source, time_ms, out_block);
 }
 
+void
+EffectDecoderSource::set_portamento_freq (float freq)
+{
+  if (m_source)
+    m_source->set_portamento_freq (freq);
+}
+
 void
 EffectDecoderSource::set_skip (float skip)
 {