diff --git a/.github/scripts/test-nodejs-npm.sh b/.github/scripts/test-nodejs-npm.sh index 85b83cd6d..c41a0de65 100755 --- a/.github/scripts/test-nodejs-npm.sh +++ b/.github/scripts/test-nodejs-npm.sh @@ -9,13 +9,26 @@ git status ls -lh ls -lh node_modules +echo '-----vad+whisper----------' + +curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2 +tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2 +rm sherpa-onnx-whisper-tiny.en.tar.bz2 + +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/Obama.wav +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx +node ./test-vad-with-non-streaming-asr-whisper.js +rm Obama.wav +rm silero_vad.onnx +rm -rf sherpa-onnx-whisper-tiny.en + echo "----------keyword spotting----------" curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/kws-models/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2 tar xvf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2 rm sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2 -node ./test-keyword_spotter-transducer.js +node ./test-keyword-spotter-transducer.js rm -rf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01 # offline asr diff --git a/.github/workflows/npm.yaml b/.github/workflows/npm.yaml index 005c906bf..9fd9f17a3 100644 --- a/.github/workflows/npm.yaml +++ b/.github/workflows/npm.yaml @@ -1,6 +1,9 @@ name: npm on: + push: + branches: + - npm workflow_dispatch: concurrency: @@ -89,7 +92,6 @@ jobs: path: ./*.tar.bz2 - name: Build nodejs package - if: false shell: bash env: NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} diff --git a/.github/workflows/test-nodejs.yaml b/.github/workflows/test-nodejs.yaml index 737e1df0d..25f3c38fd 100644 --- a/.github/workflows/test-nodejs.yaml +++ b/.github/workflows/test-nodejs.yaml @@ -112,7 +112,7 @@ jobs: node --version npm --version export d=scripts/nodejs - cat index.js + cat $d/index.js pushd $d npm install diff --git a/CHANGELOG.md b/CHANGELOG.md index 8977bae89..f78268086 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,14 @@ +## 1.10.23 + +* flutter: add lang, emotion, event to OfflineRecognizerResult (#1268) +* Use a separate thread to initialize models for lazarus examples. (#1270) +* Object pascal examples for recording and playing audio with portaudio. (#1271) +* Text to speech API for Object Pascal. (#1273) +* update kotlin api for better release native object and add user-friendly apis. (#1275) +* Update wave-reader.cc to support 8/16/32-bit waves (#1278) +* Add WebAssembly for VAD (#1281) +* WebAssembly example for VAD + Non-streaming ASR (#1284) + ## 1.10.22 * Add Pascal API for reading wave files (#1243) diff --git a/CMakeLists.txt b/CMakeLists.txt index f5283d43a..4b94e2351 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -11,7 +11,7 @@ project(sherpa-onnx) # ./nodejs-addon-examples # ./dart-api-examples/ # ./CHANGELOG.md -set(SHERPA_ONNX_VERSION "1.10.22") +set(SHERPA_ONNX_VERSION "1.10.23") # Disable warning about # diff --git a/dart-api-examples/add-punctuations/pubspec.yaml b/dart-api-examples/add-punctuations/pubspec.yaml index 5efb28b42..91fde0353 100644 --- a/dart-api-examples/add-punctuations/pubspec.yaml +++ b/dart-api-examples/add-punctuations/pubspec.yaml @@ -9,7 +9,7 @@ environment: sdk: ^3.4.0 dependencies: - sherpa_onnx: ^1.10.22 + sherpa_onnx: ^1.10.23 path: ^1.9.0 args: ^2.5.0 diff --git a/dart-api-examples/audio-tagging/pubspec.yaml b/dart-api-examples/audio-tagging/pubspec.yaml index de9c515c1..69235c159 100644 --- a/dart-api-examples/audio-tagging/pubspec.yaml +++ b/dart-api-examples/audio-tagging/pubspec.yaml @@ -9,7 +9,7 @@ environment: sdk: ^3.4.0 dependencies: - sherpa_onnx: ^1.10.22 + sherpa_onnx: ^1.10.23 path: ^1.9.0 args: ^2.5.0 diff --git a/dart-api-examples/keyword-spotter/pubspec.yaml b/dart-api-examples/keyword-spotter/pubspec.yaml index 93cd09173..7b78341c7 100644 --- a/dart-api-examples/keyword-spotter/pubspec.yaml +++ b/dart-api-examples/keyword-spotter/pubspec.yaml @@ -9,7 +9,7 @@ environment: sdk: ^3.4.0 dependencies: - sherpa_onnx: ^1.10.22 + sherpa_onnx: ^1.10.23 # sherpa_onnx: # path: ../../flutter/sherpa_onnx path: ^1.9.0 diff --git a/dart-api-examples/non-streaming-asr/pubspec.yaml b/dart-api-examples/non-streaming-asr/pubspec.yaml index 4ecf29778..82c359d24 100644 --- a/dart-api-examples/non-streaming-asr/pubspec.yaml +++ b/dart-api-examples/non-streaming-asr/pubspec.yaml @@ -10,7 +10,7 @@ environment: # Add regular dependencies here. dependencies: - sherpa_onnx: ^1.10.22 + sherpa_onnx: ^1.10.23 path: ^1.9.0 args: ^2.5.0 diff --git a/dart-api-examples/speaker-identification/pubspec.yaml b/dart-api-examples/speaker-identification/pubspec.yaml index 2e3c4b7ef..bd3e3e5be 100644 --- a/dart-api-examples/speaker-identification/pubspec.yaml +++ b/dart-api-examples/speaker-identification/pubspec.yaml @@ -9,7 +9,7 @@ environment: sdk: ^3.4.0 dependencies: - sherpa_onnx: ^1.10.22 + sherpa_onnx: ^1.10.23 path: ^1.9.0 args: ^2.5.0 diff --git a/dart-api-examples/streaming-asr/pubspec.yaml b/dart-api-examples/streaming-asr/pubspec.yaml index 7e0856290..b5c289eb5 100644 --- a/dart-api-examples/streaming-asr/pubspec.yaml +++ b/dart-api-examples/streaming-asr/pubspec.yaml @@ -11,7 +11,7 @@ environment: # Add regular dependencies here. dependencies: - sherpa_onnx: ^1.10.22 + sherpa_onnx: ^1.10.23 path: ^1.9.0 args: ^2.5.0 diff --git a/dart-api-examples/tts/pubspec.yaml b/dart-api-examples/tts/pubspec.yaml index da6cebafe..89c648cfd 100644 --- a/dart-api-examples/tts/pubspec.yaml +++ b/dart-api-examples/tts/pubspec.yaml @@ -8,7 +8,7 @@ environment: # Add regular dependencies here. dependencies: - sherpa_onnx: ^1.10.22 + sherpa_onnx: ^1.10.23 path: ^1.9.0 args: ^2.5.0 diff --git a/dart-api-examples/vad-with-non-streaming-asr/pubspec.yaml b/dart-api-examples/vad-with-non-streaming-asr/pubspec.yaml index 4f3e37143..a08f811e0 100644 --- a/dart-api-examples/vad-with-non-streaming-asr/pubspec.yaml +++ b/dart-api-examples/vad-with-non-streaming-asr/pubspec.yaml @@ -10,7 +10,7 @@ environment: sdk: ^3.4.0 dependencies: - sherpa_onnx: ^1.10.22 + sherpa_onnx: ^1.10.23 path: ^1.9.0 args: ^2.5.0 diff --git a/dart-api-examples/vad/pubspec.yaml b/dart-api-examples/vad/pubspec.yaml index 9cc6186e2..c670af7da 100644 --- a/dart-api-examples/vad/pubspec.yaml +++ b/dart-api-examples/vad/pubspec.yaml @@ -9,7 +9,7 @@ environment: sdk: ^3.4.0 dependencies: - sherpa_onnx: ^1.10.22 + sherpa_onnx: ^1.10.23 path: ^1.9.0 args: ^2.5.0 diff --git a/flutter-examples/streaming_asr/pubspec.yaml b/flutter-examples/streaming_asr/pubspec.yaml index f3d67be19..bafe4068f 100644 --- a/flutter-examples/streaming_asr/pubspec.yaml +++ b/flutter-examples/streaming_asr/pubspec.yaml @@ -5,7 +5,7 @@ description: > publish_to: 'none' -version: 1.10.22 +version: 1.10.23 topics: - speech-recognition @@ -30,7 +30,7 @@ dependencies: record: ^5.1.0 url_launcher: ^6.2.6 - sherpa_onnx: ^1.10.22 + sherpa_onnx: ^1.10.23 # sherpa_onnx: # path: ../../flutter/sherpa_onnx diff --git a/flutter-examples/tts/pubspec.yaml b/flutter-examples/tts/pubspec.yaml index 745f8faf4..f9eb64da6 100644 --- a/flutter-examples/tts/pubspec.yaml +++ b/flutter-examples/tts/pubspec.yaml @@ -5,7 +5,7 @@ description: > publish_to: 'none' # Remove this line if you wish to publish to pub.dev -version: 1.10.22 +version: 1.10.23 environment: sdk: '>=3.4.0 <4.0.0' @@ -17,7 +17,7 @@ dependencies: cupertino_icons: ^1.0.6 path_provider: ^2.1.3 path: ^1.9.0 - sherpa_onnx: ^1.10.22 + sherpa_onnx: ^1.10.23 url_launcher: ^6.2.6 audioplayers: ^5.0.0 diff --git a/flutter/sherpa_onnx/pubspec.yaml b/flutter/sherpa_onnx/pubspec.yaml index 22765831d..106607999 100644 --- a/flutter/sherpa_onnx/pubspec.yaml +++ b/flutter/sherpa_onnx/pubspec.yaml @@ -17,7 +17,7 @@ topics: - voice-activity-detection # remember to change the version in ../sherpa_onnx_macos/macos/sherpa_onnx_macos.podspec -version: 1.10.22 +version: 1.10.23 homepage: https://github.com/k2-fsa/sherpa-onnx @@ -30,23 +30,23 @@ dependencies: flutter: sdk: flutter - sherpa_onnx_android: ^1.10.22 + sherpa_onnx_android: ^1.10.23 # sherpa_onnx_android: # path: ../sherpa_onnx_android - sherpa_onnx_macos: ^1.10.22 + sherpa_onnx_macos: ^1.10.23 # sherpa_onnx_macos: # path: ../sherpa_onnx_macos - sherpa_onnx_linux: ^1.10.22 + sherpa_onnx_linux: ^1.10.23 # sherpa_onnx_linux: # path: ../sherpa_onnx_linux # - sherpa_onnx_windows: ^1.10.22 + sherpa_onnx_windows: ^1.10.23 # sherpa_onnx_windows: # path: ../sherpa_onnx_windows - sherpa_onnx_ios: ^1.10.22 + sherpa_onnx_ios: ^1.10.23 # sherpa_onnx_ios: # path: ../sherpa_onnx_ios diff --git a/flutter/sherpa_onnx_ios/ios/sherpa_onnx_ios.podspec b/flutter/sherpa_onnx_ios/ios/sherpa_onnx_ios.podspec index c3e261387..bc91bc803 100644 --- a/flutter/sherpa_onnx_ios/ios/sherpa_onnx_ios.podspec +++ b/flutter/sherpa_onnx_ios/ios/sherpa_onnx_ios.podspec @@ -7,7 +7,7 @@ # https://groups.google.com/g/dart-ffi/c/nUATMBy7r0c Pod::Spec.new do |s| s.name = 'sherpa_onnx_ios' - s.version = '1.10.22' + s.version = '1.10.23' s.summary = 'A new Flutter FFI plugin project.' s.description = <<-DESC A new Flutter FFI plugin project. diff --git a/flutter/sherpa_onnx_macos/macos/sherpa_onnx_macos.podspec b/flutter/sherpa_onnx_macos/macos/sherpa_onnx_macos.podspec index 956b5c91b..cfbabb144 100644 --- a/flutter/sherpa_onnx_macos/macos/sherpa_onnx_macos.podspec +++ b/flutter/sherpa_onnx_macos/macos/sherpa_onnx_macos.podspec @@ -4,7 +4,7 @@ # Pod::Spec.new do |s| s.name = 'sherpa_onnx_macos' - s.version = '1.10.22' + s.version = '1.10.23' s.summary = 'sherpa-onnx Flutter FFI plugin project.' s.description = <<-DESC sherpa-onnx Flutter FFI plugin project. diff --git a/new-release.sh b/new-release.sh new file mode 100755 index 000000000..056107fd6 --- /dev/null +++ b/new-release.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash + +find flutter -name *.yaml -type f -exec sed -i.bak 's/1\.10\.22/1\.10\.23/g' {} \; +find dart-api-examples -name *.yaml -type f -exec sed -i.bak 's/1\.10\.22/1\.10\.23/g' {} \; +find flutter-examples -name *.yaml -type f -exec sed -i.bak 's/1\.10\.22/1\.10\.23/g' {} \; +find flutter -name *.podspec -type f -exec sed -i.bak 's/1\.10\.22/1\.10\.23/g' {} \; +find nodejs-addon-examples -name package.json -type f -exec sed -i.bak 's/1\.10\.22/1\.10\.23/g' {} \; diff --git a/nodejs-addon-examples/package.json b/nodejs-addon-examples/package.json index 744d08ea0..d80d25108 100644 --- a/nodejs-addon-examples/package.json +++ b/nodejs-addon-examples/package.json @@ -1,5 +1,5 @@ { "dependencies": { - "sherpa-onnx-node": "^1.10.22" + "sherpa-onnx-node": "^1.10.23" } } diff --git a/nodejs-addon-examples/test_vad_with_non_streaming_asr_whisper.js b/nodejs-addon-examples/test_vad_with_non_streaming_asr_whisper.js index 20e17db78..6f3783e7c 100644 --- a/nodejs-addon-examples/test_vad_with_non_streaming_asr_whisper.js +++ b/nodejs-addon-examples/test_vad_with_non_streaming_asr_whisper.js @@ -120,8 +120,8 @@ console.log('Done') const elapsed_seconds = (stop - start) / 1000; const duration = wave.samples.length / wave.sampleRate; const real_time_factor = elapsed_seconds / duration; -console.log('Wave duration', duration.toFixed(3), 'secodns') -console.log('Elapsed', elapsed_seconds.toFixed(3), 'secodns') +console.log('Wave duration', duration.toFixed(3), 'seconds') +console.log('Elapsed', elapsed_seconds.toFixed(3), 'seconds') console.log( `RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`, real_time_factor.toFixed(3)) diff --git a/nodejs-examples/test-vad-with-non-streaming-asr-whisper.js b/nodejs-examples/test-vad-with-non-streaming-asr-whisper.js new file mode 100644 index 000000000..e84c3ab11 --- /dev/null +++ b/nodejs-examples/test-vad-with-non-streaming-asr-whisper.js @@ -0,0 +1,124 @@ +// Copyright (c) 2023-2024 Xiaomi Corporation (authors: Fangjun Kuang) + +const sherpa_onnx = require('sherpa-onnx'); + +function createRecognizer() { + // Please download test files from + // https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models + const config = { + 'modelConfig': { + 'whisper': { + 'encoder': './sherpa-onnx-whisper-tiny.en/tiny.en-encoder.int8.onnx', + 'decoder': './sherpa-onnx-whisper-tiny.en/tiny.en-decoder.int8.onnx', + 'tailPaddings': 2000, + }, + 'tokens': './sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt', + 'debug': 0, + } + }; + + return sherpa_onnx.createOfflineRecognizer(config); +} + +function createVad() { + // please download silero_vad.onnx from + // https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx + const config = { + sileroVad: { + model: './silero_vad.onnx', + threshold: 0.5, + minSpeechDuration: 0.25, + minSilenceDuration: 0.5, + windowSize: 512, + }, + sampleRate: 16000, + debug: true, + numThreads: 1, + bufferSizeInSeconds: 60, + }; + + return sherpa_onnx.createVad(config); +} + +const recognizer = createRecognizer(); +const vad = createVad(); + +// please download ./Obama.wav from +// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models +const waveFilename = './Obama.wav'; +const wave = sherpa_onnx.readWave(waveFilename); + +if (wave.sampleRate != recognizer.config.featConfig.sampleRate) { + throw new Error( + 'Expected sample rate: ${recognizer.config.featConfig.sampleRate}. Given: ${wave.sampleRate}'); +} + +console.log('Started') +let start = Date.now(); + +const windowSize = vad.config.sileroVad.windowSize; +for (let i = 0; i < wave.samples.length; i += windowSize) { + const thisWindow = wave.samples.subarray(i, i + windowSize); + vad.acceptWaveform(thisWindow); + + while (!vad.isEmpty()) { + const segment = vad.front(); + vad.pop(); + + let start_time = segment.start / wave.sampleRate; + let end_time = start_time + segment.samples.length / wave.sampleRate; + + start_time = start_time.toFixed(2); + end_time = end_time.toFixed(2); + + const stream = recognizer.createStream(); + stream.acceptWaveform(wave.sampleRate, segment.samples); + + recognizer.decode(stream); + const r = recognizer.getResult(stream); + if (r.text.length > 0) { + const text = r.text.toLowerCase().trim(); + console.log(`${start_time} -- ${end_time}: ${text}`); + } + + stream.free(); + } +} + +vad.flush(); + +while (!vad.isEmpty()) { + const segment = vad.front(); + vad.pop(); + + let start_time = segment.start / wave.sampleRate; + let end_time = start_time + segment.samples.length / wave.sampleRate; + + start_time = start_time.toFixed(2); + end_time = end_time.toFixed(2); + + const stream = recognizer.createStream(); + stream.acceptWaveform(wave.sampleRate, segment.samples); + + recognizer.decode(stream); + const r = recognizer.getResult(stream); + if (r.text.length > 0) { + const text = r.text.toLowerCase().trim(); + console.log(`${start_time} -- ${end_time}: ${text}`); + } +} + +let stop = Date.now(); +console.log('Done') + +const elapsed_seconds = (stop - start) / 1000; +const duration = wave.samples.length / wave.sampleRate; +const real_time_factor = elapsed_seconds / duration; +console.log('Wave duration', duration.toFixed(3), 'seconds') +console.log('Elapsed', elapsed_seconds.toFixed(3), 'seconds') +console.log( + `RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`, + real_time_factor.toFixed(3)) + +vad.free(); +recognizer.free(); diff --git a/scripts/nodejs/index.js b/scripts/nodejs/index.js index 2375afefa..3f0789edb 100644 --- a/scripts/nodejs/index.js +++ b/scripts/nodejs/index.js @@ -6,6 +6,7 @@ const sherpa_onnx_asr = require('./sherpa-onnx-asr.js'); const sherpa_onnx_tts = require('./sherpa-onnx-tts.js'); const sherpa_onnx_kws = require('./sherpa-onnx-kws.js'); const sherpa_onnx_wave = require('./sherpa-onnx-wave.js'); +const sherpa_onnx_vad = require('./sherpa-onnx-vad.js'); function createOnlineRecognizer(config) { return sherpa_onnx_asr.createOnlineRecognizer(wasmModule, config); @@ -23,6 +24,14 @@ function createKws(config) { return sherpa_onnx_kws.createKws(wasmModule, config); } +function createCircularBuffer(capacity) { + return new sherpa_onnx_vad.CircularBuffer(capacity, wasmModule); +} + +function createVad(config) { + return sherpa_onnx_vad.createVad(wasmModule, config); +} + function readWave(filename) { return sherpa_onnx_wave.readWave(filename, wasmModule); } @@ -40,4 +49,6 @@ module.exports = { createKws, readWave, writeWave, + createCircularBuffer, + createVad, }; diff --git a/wasm/asr/sherpa-onnx-asr.js b/wasm/asr/sherpa-onnx-asr.js index 71848a7a9..f0b8bb778 100644 --- a/wasm/asr/sherpa-onnx-asr.js +++ b/wasm/asr/sherpa-onnx-asr.js @@ -546,7 +546,7 @@ function initSherpaOnnxOfflineWhisperModelConfig(config, Module) { Module.setValue(ptr + 12, buffer + offset, 'i8*'); offset += taskLen; - Module.setValue(ptr + 16, config.tailPaddings || -1, 'i32'); + Module.setValue(ptr + 16, config.tailPaddings || 2000, 'i32'); return { buffer: buffer, ptr: ptr, len: len,