Skip to content

Commit

Permalink
Merge branch 'spectral-envelope'
Browse files Browse the repository at this point in the history
* spectral-envelope:
  LIB: LiveDecoderSource: make set_portamento_freq pure virtual
	Every source should implement it.
  LIB: MorphSourceModule: ignore set_portamento_freq() for now
  JACK: ignore set_portamento_freq for live decoder source
  TESTS: ignore set_portamento_freq for live decoder sources
  LIB: EffectDecoderSource: support set_portamento_freq
  LIB: MorphGridModule: whitespace fix
  TODO+++
  LIB: Encoder: use fundamental frequency estimation from AudioTool
  LIB: AudioBlock: use fundamental frequency estimation from AudioTool
  LIB: AudioTool: add fundamental frequency estimation algorithm
	This used to be part of AudioBlock, but having it available in AudioTool
	makes it possible to estimate a fundamental frequency without AudioBlock.
  LIB: MorphWavSource: remove unused property names
  LIB: VoiceSource: use const fuzzy freq for each frac [0..1] range
  LIB: VoiceSource: randomize start value of fuzzy frac to range [0..1]
  LIB: Encoder: merge window_weight computations of different steps
  LIB: WavData: remove obsolete FIXME
  LIB: VoiceSource: optimize detune factor generation
  LIB: VoiceSource: set maximum number of resynthesis partials to 1000
  LIB: VoiceSource: generate detune factors as needed (performance)
  LIB: VoiceSource: compute number of resynth partials before loop
  LIB: VoiceSource: generate detune factors on demand (performance)
  LIB: VoiceSource: avoid generating inaudible partials (performance)
  LIB: VoiceSource: optimize magnitude normalization step
  LIB: WavSource: remove formants/spectral mode
  LIB: VoiceSource: avoid allocations in process_block()
  LIB: VoiceSource: remove FFT related member variables
  LIB: WavSource: Resynthesis: use required number of partials (performance)
  LIB: MorphWavSourceModule: remove debugging output
  LIB: WavSource: hard code fuzzy resynth freq and max fuzzy resynth
  GLUI: MorphWavSourceView: only show fuzzy resynth in formant resynth mode
  LIB: WavSource: use percent parameter for fuzzy resynth
  TESTS: testmidisynth: support pitch expression in scripts
  LIB: WavSetBuilder: adapt block fundamental during auto tune
  LIB: AudioTool: update spectral envelope parameters properly
  GLUI: MorphWavSourceView: add UI for formant correction
  LIB: MorphWavSourceModule: implement formant correction
  LIB: MorphWavSource: add config for formant correction
  LIB: MorphLinearModule: implement set_portamento_freq()
  LIB: MorphGridModule: implement set_portamento_freq
  LIB: LiveDecoder: provide portamento frequency to source
  LIB: Encoder: estimate block spectral envelope
  LIB: AudioBlock: add fields for spectral envelope and block f0

Signed-off-by: Stefan Westerfeld <[email protected]>
  • Loading branch information
swesterfeld committed Apr 10, 2024
2 parents 37863e9 + 5dc7bd1 commit 7285766
Show file tree
Hide file tree
Showing 28 changed files with 495 additions and 79 deletions.
7 changes: 7 additions & 0 deletions TODO
Original file line number Diff line number Diff line change
@@ -1,4 +1,11 @@
NEWER TODOS:
VoiceSource:
- better names for WavSource formant correction
- better names inside VoiceSource; rename VoiceSource
- [maybe] support formant correction for Source
- experiments for handling freq < 1 in spectral envelope
- experiments for start phase randomization
Other:
- portamento should also affect filter cutoff (key tracking)
- 1-instrument-wav-source contains the data - not a good idea!
- LV2 archive storage doesn't seem to work
Expand Down
9 changes: 9 additions & 0 deletions glui/smmorphwavsourceview.cc
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,12 @@ MorphWavSourceView::MorphWavSourceView (Widget *parent, MorphWavSource *morph_wa
// POSITION
pv_position = add_property_view (MorphWavSource::P_POSITION, op_layout);

// FORMANT CORRECT
auto pv_formant_correct = add_property_view (MorphWavSource::P_FORMANT_CORRECT, op_layout);
prop_formant_correct = pv_formant_correct->property();
connect (prop_formant_correct->signal_value_changed, this, &MorphWavSourceView::update_visible);
pv_fuzzy_resynth = add_property_view (MorphWavSource::P_FUZZY_RESYNTH, op_layout);

update_visible();

instrument_label->set_x (0);
Expand Down Expand Up @@ -272,6 +278,9 @@ MorphWavSourceView::update_visible()
bool custom_position = (prop_play_mode->get() == MorphWavSource::PLAY_MODE_CUSTOM_POSITION);
pv_position->set_visible (custom_position);

bool resynth = (prop_formant_correct->get() == MorphWavSource::FORMANT_RESYNTH);
pv_fuzzy_resynth->set_visible (resynth);

op_layout.activate();
signal_size_changed();
}
6 changes: 4 additions & 2 deletions glui/smmorphwavsourceview.hh
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,10 @@ class MorphWavSourceView : public MorphOperatorView
UserInstrumentIndex *user_instrument_index = nullptr;
std::unique_ptr<Instrument> edit_instrument; // temporary copy used for editing

Property *prop_play_mode;
PropertyView *pv_position;
Property *prop_play_mode = nullptr;
PropertyView *pv_position = nullptr;
Property *prop_formant_correct = nullptr;
PropertyView *pv_fuzzy_resynth = nullptr;
OperatorLayout op_layout;

void on_edit();
Expand Down
7 changes: 7 additions & 0 deletions jack/smsimplejackplayer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ class Source : public LiveDecoderSource
void retrigger (int, float, int) override;
Audio* audio() override;
bool rt_audio_block (size_t index, RTAudioBlock& out_block) override;
void set_portamento_freq (float freq) override;
};

Source::Source (Audio *audio) :
Expand Down Expand Up @@ -52,6 +53,12 @@ Source::rt_audio_block (size_t index, RTAudioBlock& out_block)
}
}

void
Source::set_portamento_freq (float freq)
{
// ignore
}

}

static int
Expand Down
56 changes: 19 additions & 37 deletions lib/smaudio.cc
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#include "smmemout.hh"
#include "smmmapin.hh"
#include "smwavsetrepo.hh"
#include "smaudiotool.hh"
#include <fcntl.h>
#include <errno.h>
#include <stdio.h>
Expand Down Expand Up @@ -144,6 +145,13 @@ SpectMorph::Audio::load (GenericIn *file, AudioLoadOptions load_options)
else
printf ("unhandled float %s %s\n", section.c_str(), ifile.event_name().c_str());
}
else if (section == "frame")
{
if (ifile.event_name() == "env_f0")
audio_block->env_f0 = ifile.event_float();
else
printf ("unhandled float %s %s\n", section.c_str(), ifile.event_name().c_str());
}
else
assert (false);
}
Expand Down Expand Up @@ -206,6 +214,10 @@ SpectMorph::Audio::load (GenericIn *file, AudioLoadOptions load_options)
{
audio_block->phases = ib;
}
else if (ifile.event_name() == "env")
{
audio_block->env = ib;
}
else if (ifile.event_name() == "noise")
{
audio_block->noise = ib;
Expand Down Expand Up @@ -300,6 +312,8 @@ SpectMorph::Audio::save (GenericOut *file) const
of.write_uint16_block ("freqs", contents[i].freqs);
of.write_uint16_block ("mags", contents[i].mags);
of.write_uint16_block ("phases", contents[i].phases);
of.write_uint16_block ("env", contents[i].env);
of.write_float ("env_f0", contents[i].env_f0);
of.write_float_block ("original_fft", contents[i].original_fft);
of.write_float_block ("debug_samples", contents[i].debug_samples);
of.end_section();
Expand Down Expand Up @@ -435,43 +449,11 @@ AudioBlock::sort_freqs()
}

double
AudioBlock::estimate_fundamental (int n_partials, double *mag) const
AudioBlock::estimate_fundamental (int n_partials) const
{
g_return_val_if_fail (n_partials >= 1 && n_partials <= 3, 1.0);

double est_freq = 0, est_mag = 0;

auto update_estimate = [&] (int n, double freq_min, double freq_max)
{
if (n > n_partials)
return;

double best_freq = 0, best_mag = 0;
AudioTool::FundamentalEst f_est;
for (size_t p = 0; p < freqs.size(); p++)
f_est.add_partial (freqs_f (p), mags_f (p));

for (size_t p = 0; p < mags.size(); p++)
{
if (freqs_f (p) > freq_min && freqs_f (p) < freq_max && mags_f (p) > best_mag)
{
best_mag = mags_f (p);
best_freq = freqs_f (p) / n;
}
}
if (best_mag > 0)
{
est_mag += best_mag;
est_freq += best_freq * best_mag;
}
};

update_estimate (1, 0.8, 1.25);
update_estimate (2, 1.5, 2.5);
update_estimate (3, 2.5, 3.5);

if (mag)
*mag = est_mag;

if (est_mag > 0)
return est_freq / est_mag;
else
return 1;
return f_est.fundamental (n_partials);
}
10 changes: 9 additions & 1 deletion lib/smaudio.hh
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,13 @@ public:
std::vector<uint16_t> freqs; //!< frequencies of the sine components of this frame
std::vector<uint16_t> mags; //!< magnitudes of the sine components
std::vector<uint16_t> phases; //!< phases of the sine components
std::vector<uint16_t> env; //!< spectral envelope for formant correction
float env_f0 = 1; //!< fundamental frequency of the spectral envelope
std::vector<float> original_fft; //!< original zeropadded FFT data - for debugging only
std::vector<float> debug_samples; //!< original audio samples for this frame - for debugging only

void sort_freqs();
double estimate_fundamental (int n_partials = 1, double *mag = nullptr) const;
double estimate_fundamental (int n_partials = 1) const;

double
freqs_f (size_t i) const
Expand All @@ -56,6 +58,12 @@ public:
return phases[i] * factor;
}

double
env_f (size_t i) const
{
return sm_idb2factor (env[i]);
}

double
noise_f (size_t i) const
{
Expand Down
61 changes: 48 additions & 13 deletions lib/smaudiotool.cc
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,10 @@ AudioTool::normalize_factor (double norm, Audio& audio)
vector<uint16_t>& noise = audio.contents[f].noise;
for (size_t i = 0; i < noise.size(); i++)
noise[i] = sm_bound<int> (0, noise[i] + norm_delta_idb, 65535);

vector<uint16_t>& env = audio.contents[f].env;
for (size_t i = 0; i < env.size(); i++)
env[i] = sm_bound<int> (0, env[i] + norm_delta_idb, 65535);
}

// store normalization in order to replay original samples normalized
Expand Down Expand Up @@ -137,18 +141,21 @@ AudioTool::get_auto_tune_factor (Audio& audio, double& tune_factor)
}

void
AudioTool::apply_auto_tune_factor (Audio& audio, double tune_factor)
AudioTool::apply_auto_tune_factor (AudioBlock& audio_block, double tune_factor)
{
for (size_t f = 0; f < audio.contents.size(); f++)
for (size_t n = 0; n < audio_block.freqs.size(); n++)
{
AudioBlock& block = audio.contents[f];

for (size_t n = 0; n < block.freqs.size(); n++)
{
const double freq = block.freqs_f (n) * tune_factor;
block.freqs[n] = sm_freq2ifreq (freq);
}
const double freq = audio_block.freqs_f (n) * tune_factor;
audio_block.freqs[n] = sm_freq2ifreq (freq);
}
audio_block.env_f0 *= tune_factor;
}

void
AudioTool::apply_auto_tune_factor (Audio& audio, double tune_factor)
{
for (auto& audio_block : audio.contents)
apply_auto_tune_factor (audio_block, tune_factor);
}

void
Expand Down Expand Up @@ -178,12 +185,40 @@ AudioTool::auto_tune_smooth (Audio& audio, int partials, double smooth_ms, doubl
double dest_freq = (freq_vector[f] / smooth_freq - 1) * interp + 1;
const double tune_factor = dest_freq / freq_vector[f];

AudioBlock& block = audio.contents[f];
apply_auto_tune_factor (audio.contents[f], tune_factor);
}
}

for (size_t p = 0; p < block.freqs.size(); p++)
void
AudioTool::FundamentalEst::add_partial (double freq, double mag)
{
auto update_estimate = [&] (int n, double freq_min, double freq_max)
{
if (freq > freq_min && freq < freq_max && mag > m_best_mag[n])
{
const double freq = block.freqs_f (p) * tune_factor;
block.freqs[p] = sm_freq2ifreq (freq);
m_best_freq[n] = freq / n;
m_best_mag[n] = mag;
}
};
update_estimate (1, 0.8, 1.25);
update_estimate (2, 1.5, 2.5);
update_estimate (3, 2.5, 3.5);
}

double
AudioTool::FundamentalEst::fundamental (int n_partials) const
{
g_return_val_if_fail (n_partials >= 1 && n_partials <= 3, 1.0);

double fsum = 0, msum = 0;
for (int i = 1; i <= n_partials; i++)
{
fsum += m_best_freq[i] * m_best_mag[i];
msum += m_best_mag[i];
}

if (msum > 0)
return fsum / msum;
else
return 1;
}
11 changes: 11 additions & 0 deletions lib/smaudiotool.hh
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ void normalize_energy (double energy, Audio& audio);

bool get_auto_tune_factor (Audio& audio, double& tune_factor);
void apply_auto_tune_factor (Audio& audio, double tune_factor);
void apply_auto_tune_factor (AudioBlock& audio_block, double tune_factor);
void auto_tune_smooth (Audio& audio, int partials, double smooth_ms, double smooth_percent);

class Block2Energy
Expand All @@ -28,6 +29,16 @@ public:
double energy (const AudioBlock& block);
};

class FundamentalEst
{
// 1 extra element to make code more readable: best_freq[1] / best_mag[1] corresponds to partial 1
std::array<double, 4> m_best_freq {};
std::array<double, 4> m_best_mag {};
public:
void add_partial (double freq, double mag);
double fundamental (int n_partials) const;
};

}

}
Expand Down
8 changes: 8 additions & 0 deletions lib/smeffectdecoder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ class EffectDecoderSource : public LiveDecoderSource
void retrigger (int channel, float freq, int midi_velocity) override;
Audio* audio() override;
bool rt_audio_block (size_t index, RTAudioBlock& out_block) override;
void set_portamento_freq (float freq) override;

void set_skip (float m_skip);
void set_source (LiveDecoderSource *source);
Expand Down Expand Up @@ -106,6 +107,13 @@ EffectDecoderSource::rt_audio_block (size_t index, RTAudioBlock& out_block)
return MorphUtils::get_normalized_block (m_source, time_ms, out_block);
}

void
EffectDecoderSource::set_portamento_freq (float freq)
{
if (m_source)
m_source->set_portamento_freq (freq);
}

void
EffectDecoderSource::set_skip (float skip)
{
Expand Down
Loading

0 comments on commit 7285766

Please sign in to comment.