Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SpeechGenerator for incremental synthesis #38

Merged
merged 6 commits into from
May 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions benches/bonsais.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ fn bonsai(bencher: &mut Bencher) {
let engine = Engine::load(&[MODEL_NITECH_ATR503]).unwrap();

bencher.iter(|| {
engine.synthesize_from_strings(&lines).unwrap();
engine.synthesize(&lines).unwrap();
});
}

Expand Down Expand Up @@ -63,7 +63,7 @@ fn is_bonsai(bencher: &mut Bencher) {
let engine = Engine::load(&[MODEL_NITECH_ATR503]).unwrap();

bencher.iter(|| {
engine.synthesize_from_strings(&lines).unwrap();
engine.synthesize(&lines).unwrap();
});
}

Expand Down Expand Up @@ -135,6 +135,6 @@ fn bonsai_letter(bencher: &mut Bencher) {
let engine = Engine::load(&[MODEL_NITECH_ATR503]).unwrap();

bencher.iter(|| {
engine.synthesize_from_strings(&lines).unwrap();
engine.synthesize(&lines).unwrap();
});
}
2 changes: 1 addition & 1 deletion examples/genji/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
iw.set_parameter(1, &[0.5, 0.5])?;
iw.set_parameter(2, &[1.0, 0.0])?;

let speech = engine.synthesize_from_strings(&lines)?;
let speech = engine.synthesize(&*lines)?;

println!(
"The synthesized voice has {} samples in total.",
Expand Down
2 changes: 1 addition & 1 deletion examples/is-bonsai/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
let engine = Engine::load(&[
"models/hts_voice_nitech_jp_atr503_m001-1.05/nitech_jp_atr503_m001.htsvoice",
])?;
let speech = engine.synthesize_from_strings(&lines)?;
let speech = engine.synthesize(&lines)?;

println!(
"The synthesized voice has {} samples in total.",
Expand Down
34 changes: 12 additions & 22 deletions src/engine.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use std::sync::Arc;

use crate::constants::DB;
use crate::duration::DurationEstimator;
use crate::label::{LabelError, Labels};
use crate::label::{LabelError, ToLabels};
use crate::mlpg_adjust::MlpgAdjust;
use crate::model::interporation_weight::InterporationWeight;
use crate::model::{ModelError, Models, VoiceSet};
Expand Down Expand Up @@ -247,27 +247,12 @@ impl Engine {
Engine { voices, condition }
}

pub fn synthesize_from_strings<S: AsRef<str>>(
&self,
lines: &[S],
) -> Result<Vec<f64>, EngineError> {
let labels = Labels::load_from_strings(
self.condition.sampling_frequency,
self.condition.fperiod,
lines,
)?;
Ok(self.generate_speech(&labels))
pub fn synthesize(&self, labels: impl ToLabels) -> Result<Vec<f64>, EngineError> {
Ok(self.generator(labels)?.synthesize_all())
}

pub fn synthesize_from_labels(
&self,
labels: Vec<jlabel::Label>,
) -> Result<Vec<f64>, EngineError> {
let labels = Labels::new(labels, None)?;
Ok(self.generate_speech(&labels))
}

pub fn generate_speech(&self, labels: &Labels) -> Vec<f64> {
pub fn generator(&self, labels: impl ToLabels) -> Result<SpeechGenerator, EngineError> {
let labels = labels.to_labels(&self.condition)?;
let vocoder = Vocoder::new(
self.voices.stream_metadata(0).vector_length,
self.voices.stream_metadata(2).vector_length,
Expand Down Expand Up @@ -324,7 +309,12 @@ impl Engine {
vec![vec![0.0; 0]; lf0.len()]
};

let generator = SpeechGenerator::new(self.condition.fperiod);
generator.synthesize(vocoder, spectrum, lf0, lpf)
Ok(SpeechGenerator::new(
self.condition.fperiod,
vocoder,
spectrum,
lf0,
lpf,
))
}
}
34 changes: 34 additions & 0 deletions src/label.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
use crate::engine::Condition;

#[derive(Debug, thiserror::Error)]
pub enum LabelError {
#[error("jlabel failed to parse fullcontext-label: {0}")]
Expand Down Expand Up @@ -106,6 +108,38 @@
}
}

pub trait ToLabels {
fn to_labels(self, condition: &Condition) -> Result<Labels, LabelError>;
}

impl ToLabels for Vec<jlabel::Label> {
fn to_labels(self, _condition: &Condition) -> Result<Labels, LabelError> {
Labels::new(self, None)
}
}

impl<S: AsRef<str>> ToLabels for &[S] {
fn to_labels(self, condition: &Condition) -> Result<Labels, LabelError> {
Labels::load_from_strings(
condition.get_sampling_frequency(),
condition.get_fperiod(),
self,

Check warning on line 126 in src/label.rs

View check run for this annotation

Codecov / codecov/patch

src/label.rs#L126

Added line #L126 was not covered by tests
)
}
}

impl<const N: usize, S: AsRef<str>> ToLabels for &[S; N] {
fn to_labels(self, condition: &Condition) -> Result<Labels, LabelError> {
self.as_slice().to_labels(condition)
}
}

impl ToLabels for Vec<String> {
fn to_labels(self, condition: &Condition) -> Result<Labels, LabelError> {
self.as_slice().to_labels(condition)

Check warning on line 139 in src/label.rs

View check run for this annotation

Codecov / codecov/patch

src/label.rs#L138-L139

Added lines #L138 - L139 were not covered by tests
}
}

#[cfg(test)]
mod tests {
use super::Labels;
Expand Down
15 changes: 8 additions & 7 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ mod tests {
fn bonsai() {
let engine = Engine::load(&[MODEL_NITECH_ATR503]).unwrap();

let speech = engine.synthesize_from_strings(&SAMPLE_SENTENCE_1).unwrap();
let speech = engine.synthesize(&SAMPLE_SENTENCE_1).unwrap();

assert_eq!(speech.len(), 66480);
approx::assert_abs_diff_eq!(speech[2000], 19.35141137623778, epsilon = 1.0e-10);
Expand All @@ -42,14 +42,14 @@ mod tests {

#[test]
fn bonsai_from_labels() {
let labels = SAMPLE_SENTENCE_1
let labels: Vec<jlabel::Label> = SAMPLE_SENTENCE_1
.iter()
.map(|l| l.parse().unwrap())
.collect();

let engine = Engine::load(&[MODEL_NITECH_ATR503]).unwrap();

let speech = engine.synthesize_from_labels(labels).unwrap();
let speech = engine.synthesize(labels).unwrap();

assert_eq!(speech.len(), 66480);
approx::assert_abs_diff_eq!(speech[2000], 19.35141137623778, epsilon = 1.0e-10);
Expand All @@ -65,7 +65,7 @@ mod tests {
iw.set_parameter(1, &[0.7, 0.3]).unwrap();
iw.set_parameter(2, &[1.0, 0.0]).unwrap();

let speech = engine.synthesize_from_strings(&SAMPLE_SENTENCE_1).unwrap();
let speech = engine.synthesize(&SAMPLE_SENTENCE_1).unwrap();

assert_eq!(speech.len(), 74880);
approx::assert_abs_diff_eq!(speech[2000], 2.3158134981607754e-5, epsilon = 1.0e-10);
Expand Down Expand Up @@ -105,7 +105,7 @@ mod tests {
fn is_this_bonsai() {
let engine = Engine::load(&[MODEL_NITECH_ATR503]).unwrap();

let speech = engine.synthesize_from_strings(&SAMPLE_SENTENCE_2).unwrap();
let speech = engine.synthesize(&SAMPLE_SENTENCE_2).unwrap();

assert_eq!(speech.len(), 100800);
approx::assert_abs_diff_eq!(speech[2000], 17.15977345625943, epsilon = 1.0e-10);
Expand All @@ -119,7 +119,7 @@ mod tests {
let mut engine = Engine::load(&[MODEL_NITECH_ATR503]).unwrap();
engine.condition.set_speed(1.4);

let speech = engine.synthesize_from_strings(&SAMPLE_SENTENCE_2).unwrap();
let speech = engine.synthesize(&SAMPLE_SENTENCE_2).unwrap();

assert_eq!(speech.len(), 72000);
approx::assert_abs_diff_eq!(speech[2000], 15.0481014871396, epsilon = 1.0e-10);
Expand All @@ -131,7 +131,8 @@ mod tests {
#[test]
fn empty() {
let engine = Engine::load(&[MODEL_NITECH_ATR503]).unwrap();
let speech = engine.synthesize_from_strings::<String>(&[]).unwrap();
let labels: [&str; 0] = [];
let speech = engine.synthesize(&labels[..]).unwrap();
assert_eq!(speech.len(), 0);
}
}
85 changes: 56 additions & 29 deletions src/speech.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,44 +4,71 @@

pub struct SpeechGenerator {
fperiod: usize,
vocoder: Vocoder,
spectrum: Parameter,
lf0: Parameter,
lpf: Parameter,

next: usize,
}

impl SpeechGenerator {
pub fn new(fperiod: usize) -> Self {
Self { fperiod }
}
/// Generate speech
pub fn synthesize(
&self,
mut v: Vocoder,
pub fn new(
fperiod: usize,
vocoder: Vocoder,
spectrum: Parameter,
lf0: Parameter,
lpf: Parameter,
) -> Vec<f64> {
// check
if !lf0.is_empty() {
if lf0[0].len() != 1 {
panic!("The size of lf0 static vector must be 1.");
}
if lpf[0].len() % 2 == 0 {
panic!("The number of low-pass filter coefficient must be odd numbers.");
}
) -> Self {
if !lf0.is_empty() && lf0[0].len() != 1 {
panic!("The size of lf0 static vector must be 1.");

Check warning on line 24 in src/speech.rs

View check run for this annotation

Codecov / codecov/patch

src/speech.rs#L24

Added line #L24 was not covered by tests
}
if !lpf.is_empty() && lpf[0].len() % 2 == 0 {
panic!("The number of low-pass filter coefficient must be odd numbers.");

Check warning on line 27 in src/speech.rs

View check run for this annotation

Codecov / codecov/patch

src/speech.rs#L27

Added line #L27 was not covered by tests
}

// create speech buffer
let total_frame = lf0.len();
let mut speech = vec![0.0; total_frame * self.fperiod];

// synthesize speech waveform
for i in 0..total_frame {
v.synthesize(
lf0[i][0],
&spectrum[i],
&lpf[i],
&mut speech[i * self.fperiod..(i + 1) * self.fperiod],
);
Self {
fperiod,
vocoder,
spectrum,
lf0,
lpf,
next: 0,
}
}

pub fn synthesized_frames(&self) -> usize {
self.next

Check warning on line 41 in src/speech.rs

View check run for this annotation

Codecov / codecov/patch

src/speech.rs#L40-L41

Added lines #L40 - L41 were not covered by tests
}

/// Generate speech
pub fn synthesize(&mut self, speech: &mut [f64]) -> usize {
if self.lf0.len() <= self.next {
return 0;
}
if speech.len() < self.fperiod {
panic!("The length of speech buffer must be larger than fperiod.");

Check warning on line 50 in src/speech.rs

View check run for this annotation

Codecov / codecov/patch

src/speech.rs#L50

Added line #L50 was not covered by tests
}

self.vocoder.synthesize(
self.lf0[self.next][0],
&self.spectrum[self.next],
&self.lpf[self.next],
speech,
);
self.next += 1;

self.fperiod
}

pub fn synthesize_all(mut self) -> Vec<f64> {
if self.next != 0 {
eprintln!("The speech generator has already synthesized some frames.");

Check warning on line 66 in src/speech.rs

View check run for this annotation

Codecov / codecov/patch

src/speech.rs#L66

Added line #L66 was not covered by tests
}

let mut buf = vec![0.0; (self.lf0.len() - self.next) * self.fperiod];
while self.synthesize(&mut buf[self.next * self.fperiod..]) > 0 {}

speech
buf
}
}
Loading