Skip to content

Commit

Permalink
Fix feature extraction for NeMo CTC models
Browse files Browse the repository at this point in the history
  • Loading branch information
csukuangfj committed Sep 27, 2024
1 parent 02c8281 commit b64f98a
Show file tree
Hide file tree
Showing 4 changed files with 14 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -47,10 +47,10 @@ jobs:
shell: bash
run: |
dirs=(
# sherpa-onnx-nemo-fast-conformer-ctc-en-24500
# sherpa-onnx-nemo-fast-conformer-ctc-es-1424
# sherpa-onnx-nemo-fast-conformer-ctc-en-de-es-fr-14288
# sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k
sherpa-onnx-nemo-fast-conformer-ctc-en-24500
sherpa-onnx-nemo-fast-conformer-ctc-es-1424
sherpa-onnx-nemo-fast-conformer-ctc-en-de-es-fr-14288
sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k
sherpa-onnx-nemo-parakeet_tdt_ctc_110m-en-36000
)
for d in ${dirs[@]}; do
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ jobs:
sherpa-onnx-nemo-fast-conformer-transducer-es-1424
sherpa-onnx-nemo-fast-conformer-transducer-en-de-es-fr-14288
sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k
sherpa-onnx-nemo-parakeet_tdt_transducer_110m-en-36000
)
for d in ${dirs[@]}; do
tar cjvf ${d}.tar.bz2 ./$d
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ mv -v *.onnx $d/
mv -v tokens.txt $d/
ls -lh $d

if false; then
# 8500 hours of English speech
url=https://catalog.ngc.nvidia.com/orgs/nvidia/teams/nemo/models/stt_en_fastconformer_hybrid_large_pc
name=$(basename $url)
Expand Down Expand Up @@ -72,7 +71,6 @@ mkdir -p $d
mv -v *.onnx $d/
mv -v tokens.txt $d/
ls -lh $d
fi

# Now test the exported model
log "Download test data"
Expand All @@ -97,14 +95,12 @@ python3 ./test-onnx-transducer-non-streaming.py \
--decoder $d/decoder.onnx \
--joiner $d/joiner.onnx \
--tokens $d/tokens.txt \
--wav $data/en.wav
--wav ./en.wav

mkdir -p $d/test_wavs
cp en.wav $d/test_wavs/0.wav
cp -v $data/en-english.wav $d/test_wavs

exit 0

d=sherpa-onnx-nemo-fast-conformer-transducer-en-24500
python3 ./test-onnx-transducer-non-streaming.py \
--encoder $d/encoder.onnx \
Expand Down
8 changes: 8 additions & 0 deletions sherpa-onnx/csrc/offline-recognizer-ctc-impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,14 @@ class OfflineRecognizerCtcImpl : public OfflineRecognizerImpl {
config_.feat_config.is_mfcc = true;
}

if (!config_.model_config.nemo_ctc.model.empty()) {
config_.feat_config.low_freq = 0;
config_.feat_config.high_freq = 0;
config_.feat_config.is_librosa = true;
config_.feat_config.remove_dc_offset = false;
config_.feat_config.window_type = "hann";
}

if (!config_.model_config.wenet_ctc.model.empty()) {
// WeNet CTC models assume input samples are in the range
// [-32768, 32767], so we set normalize_samples to false
Expand Down

0 comments on commit b64f98a

Please sign in to comment.