Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add VAD and keyword spotting for the Node package with WebAssembly #1286

Merged
merged 5 commits into from
Aug 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions .github/scripts/test-nodejs-npm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,28 @@ git status
ls -lh
ls -lh node_modules

echo '-----vad+whisper----------'

curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2
tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2
rm sherpa-onnx-whisper-tiny.en.tar.bz2

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/Obama.wav
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
node ./test-vad-with-non-streaming-asr-whisper.js
rm Obama.wav
rm silero_vad.onnx
rm -rf sherpa-onnx-whisper-tiny.en

echo "----------keyword spotting----------"

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/kws-models/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
tar xvf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
rm sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2

node ./test-keyword-spotter-transducer.js
rm -rf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01

# offline asr
#
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
Expand Down
31 changes: 29 additions & 2 deletions .github/workflows/npm.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
name: npm

on:
push:
branches:
- npm
workflow_dispatch:

concurrency:
Expand All @@ -27,6 +30,9 @@ jobs:

- name: Install emsdk
uses: mymindstorm/setup-emsdk@v14
with:
version: 3.1.51
actions-cache-folder: 'emsdk-cache'

- name: View emsdk version
shell: bash
Expand All @@ -51,8 +57,6 @@ jobs:

- name: Build nodejs package
shell: bash
env:
NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
run: |
./build-wasm-simd-nodejs.sh
cp -v build-wasm-simd-nodejs/install/bin/wasm/nodejs/*.js ./scripts/nodejs/
Expand All @@ -71,6 +75,29 @@ jobs:

rm package.json.bak

- name: Collect files
shell: bash
run: |
dst=sherpa-onnx-wasm-nodejs
mkdir $dst
cp -v scripts/nodejs/* $dst
tar cvjf $dst.tar.bz2 $dst

echo "---"
ls -h $dst

- uses: actions/upload-artifact@v4
with:
name: sherpa-onnx-wasm-nodejs
path: ./*.tar.bz2

- name: Build nodejs package
shell: bash
env:
NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
run: |
cd scripts/nodejs

git diff

npm install
Expand Down
4 changes: 4 additions & 0 deletions .github/workflows/test-nodejs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,9 @@ jobs:

- name: Install emsdk
uses: mymindstorm/setup-emsdk@v14
with:
version: 3.1.51
actions-cache-folder: 'emsdk-cache'

- name: View emsdk version
shell: bash
Expand Down Expand Up @@ -109,6 +112,7 @@ jobs:
node --version
npm --version
export d=scripts/nodejs
cat $d/index.js

pushd $d
npm install
Expand Down
11 changes: 11 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,14 @@
## 1.10.23

* flutter: add lang, emotion, event to OfflineRecognizerResult (#1268)
* Use a separate thread to initialize models for lazarus examples. (#1270)
* Object pascal examples for recording and playing audio with portaudio. (#1271)
* Text to speech API for Object Pascal. (#1273)
* update kotlin api for better release native object and add user-friendly apis. (#1275)
* Update wave-reader.cc to support 8/16/32-bit waves (#1278)
* Add WebAssembly for VAD (#1281)
* WebAssembly example for VAD + Non-streaming ASR (#1284)

## 1.10.22

* Add Pascal API for reading wave files (#1243)
Expand Down
3 changes: 2 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ project(sherpa-onnx)
# ./nodejs-addon-examples
# ./dart-api-examples/
# ./CHANGELOG.md
set(SHERPA_ONNX_VERSION "1.10.22")
set(SHERPA_ONNX_VERSION "1.10.23")

# Disable warning about
#
Expand Down Expand Up @@ -206,6 +206,7 @@ if(SHERPA_ONNX_ENABLE_WASM_NODEJS)
if(NOT SHERPA_ONNX_ENABLE_WASM)
message(FATAL_ERROR "Please set SHERPA_ONNX_ENABLE_WASM to ON if you enable WASM for NodeJS")
endif()
add_definitions(-DSHERPA_ONNX_ENABLE_WASM_KWS=1)
endif()

if(SHERPA_ONNX_ENABLE_WASM)
Expand Down
2 changes: 1 addition & 1 deletion dart-api-examples/add-punctuations/pubspec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ environment:
sdk: ^3.4.0

dependencies:
sherpa_onnx: ^1.10.22
sherpa_onnx: ^1.10.23
path: ^1.9.0
args: ^2.5.0

Expand Down
2 changes: 1 addition & 1 deletion dart-api-examples/audio-tagging/pubspec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ environment:
sdk: ^3.4.0

dependencies:
sherpa_onnx: ^1.10.22
sherpa_onnx: ^1.10.23
path: ^1.9.0
args: ^2.5.0

Expand Down
2 changes: 1 addition & 1 deletion dart-api-examples/keyword-spotter/pubspec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ environment:
sdk: ^3.4.0

dependencies:
sherpa_onnx: ^1.10.22
sherpa_onnx: ^1.10.23
# sherpa_onnx:
# path: ../../flutter/sherpa_onnx
path: ^1.9.0
Expand Down
2 changes: 1 addition & 1 deletion dart-api-examples/non-streaming-asr/pubspec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ environment:

# Add regular dependencies here.
dependencies:
sherpa_onnx: ^1.10.22
sherpa_onnx: ^1.10.23
path: ^1.9.0
args: ^2.5.0

Expand Down
2 changes: 1 addition & 1 deletion dart-api-examples/speaker-identification/pubspec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ environment:
sdk: ^3.4.0

dependencies:
sherpa_onnx: ^1.10.22
sherpa_onnx: ^1.10.23
path: ^1.9.0
args: ^2.5.0

Expand Down
2 changes: 1 addition & 1 deletion dart-api-examples/streaming-asr/pubspec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ environment:

# Add regular dependencies here.
dependencies:
sherpa_onnx: ^1.10.22
sherpa_onnx: ^1.10.23
path: ^1.9.0
args: ^2.5.0

Expand Down
2 changes: 1 addition & 1 deletion dart-api-examples/tts/pubspec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ environment:

# Add regular dependencies here.
dependencies:
sherpa_onnx: ^1.10.22
sherpa_onnx: ^1.10.23
path: ^1.9.0
args: ^2.5.0

Expand Down
2 changes: 1 addition & 1 deletion dart-api-examples/vad-with-non-streaming-asr/pubspec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ environment:
sdk: ^3.4.0

dependencies:
sherpa_onnx: ^1.10.22
sherpa_onnx: ^1.10.23
path: ^1.9.0
args: ^2.5.0

Expand Down
2 changes: 1 addition & 1 deletion dart-api-examples/vad/pubspec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ environment:
sdk: ^3.4.0

dependencies:
sherpa_onnx: ^1.10.22
sherpa_onnx: ^1.10.23
path: ^1.9.0
args: ^2.5.0

Expand Down
4 changes: 2 additions & 2 deletions flutter-examples/streaming_asr/pubspec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ description: >

publish_to: 'none'

version: 1.10.22
version: 1.10.23

topics:
- speech-recognition
Expand All @@ -30,7 +30,7 @@ dependencies:
record: ^5.1.0
url_launcher: ^6.2.6

sherpa_onnx: ^1.10.22
sherpa_onnx: ^1.10.23
# sherpa_onnx:
# path: ../../flutter/sherpa_onnx

Expand Down
4 changes: 2 additions & 2 deletions flutter-examples/tts/pubspec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ description: >

publish_to: 'none' # Remove this line if you wish to publish to pub.dev

version: 1.10.22
version: 1.10.23

environment:
sdk: '>=3.4.0 <4.0.0'
Expand All @@ -17,7 +17,7 @@ dependencies:
cupertino_icons: ^1.0.6
path_provider: ^2.1.3
path: ^1.9.0
sherpa_onnx: ^1.10.22
sherpa_onnx: ^1.10.23
url_launcher: ^6.2.6
audioplayers: ^5.0.0

Expand Down
12 changes: 6 additions & 6 deletions flutter/sherpa_onnx/pubspec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ topics:
- voice-activity-detection

# remember to change the version in ../sherpa_onnx_macos/macos/sherpa_onnx_macos.podspec
version: 1.10.22
version: 1.10.23

homepage: https://github.com/k2-fsa/sherpa-onnx

Expand All @@ -30,23 +30,23 @@ dependencies:
flutter:
sdk: flutter

sherpa_onnx_android: ^1.10.22
sherpa_onnx_android: ^1.10.23
# sherpa_onnx_android:
# path: ../sherpa_onnx_android

sherpa_onnx_macos: ^1.10.22
sherpa_onnx_macos: ^1.10.23
# sherpa_onnx_macos:
# path: ../sherpa_onnx_macos

sherpa_onnx_linux: ^1.10.22
sherpa_onnx_linux: ^1.10.23
# sherpa_onnx_linux:
# path: ../sherpa_onnx_linux
#
sherpa_onnx_windows: ^1.10.22
sherpa_onnx_windows: ^1.10.23
# sherpa_onnx_windows:
# path: ../sherpa_onnx_windows

sherpa_onnx_ios: ^1.10.22
sherpa_onnx_ios: ^1.10.23
# sherpa_onnx_ios:
# path: ../sherpa_onnx_ios

Expand Down
2 changes: 1 addition & 1 deletion flutter/sherpa_onnx_ios/ios/sherpa_onnx_ios.podspec
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
# https://groups.google.com/g/dart-ffi/c/nUATMBy7r0c
Pod::Spec.new do |s|
s.name = 'sherpa_onnx_ios'
s.version = '1.10.22'
s.version = '1.10.23'
s.summary = 'A new Flutter FFI plugin project.'
s.description = <<-DESC
A new Flutter FFI plugin project.
Expand Down
2 changes: 1 addition & 1 deletion flutter/sherpa_onnx_macos/macos/sherpa_onnx_macos.podspec
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
#
Pod::Spec.new do |s|
s.name = 'sherpa_onnx_macos'
s.version = '1.10.22'
s.version = '1.10.23'
s.summary = 'sherpa-onnx Flutter FFI plugin project.'
s.description = <<-DESC
sherpa-onnx Flutter FFI plugin project.
Expand Down
7 changes: 7 additions & 0 deletions new-release.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#!/usr/bin/env bash

find flutter -name *.yaml -type f -exec sed -i.bak 's/1\.10\.22/1\.10\.23/g' {} \;
find dart-api-examples -name *.yaml -type f -exec sed -i.bak 's/1\.10\.22/1\.10\.23/g' {} \;
find flutter-examples -name *.yaml -type f -exec sed -i.bak 's/1\.10\.22/1\.10\.23/g' {} \;
find flutter -name *.podspec -type f -exec sed -i.bak 's/1\.10\.22/1\.10\.23/g' {} \;
find nodejs-addon-examples -name package.json -type f -exec sed -i.bak 's/1\.10\.22/1\.10\.23/g' {} \;
2 changes: 1 addition & 1 deletion nodejs-addon-examples/package.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"dependencies": {
"sherpa-onnx-node": "^1.10.22"
"sherpa-onnx-node": "^1.10.23"
}
}
2 changes: 1 addition & 1 deletion nodejs-addon-examples/test_keyword_spotter_transducer.js
Original file line number Diff line number Diff line change
Expand Up @@ -42,11 +42,11 @@ stream.acceptWaveform({samples: tailPadding, sampleRate: wave.sampleRate});

const detectedKeywords = [];
while (kws.isReady(stream)) {
kws.decode(stream);
const keyword = kws.getResult(stream).keyword;
if (keyword != '') {
detectedKeywords.push(keyword);
}
kws.decode(stream);
}
let stop = Date.now();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -120,8 +120,8 @@ console.log('Done')
const elapsed_seconds = (stop - start) / 1000;
const duration = wave.samples.length / wave.sampleRate;
const real_time_factor = elapsed_seconds / duration;
console.log('Wave duration', duration.toFixed(3), 'secodns')
console.log('Elapsed', elapsed_seconds.toFixed(3), 'secodns')
console.log('Wave duration', duration.toFixed(3), 'seconds')
console.log('Elapsed', elapsed_seconds.toFixed(3), 'seconds')
console.log(
`RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
real_time_factor.toFixed(3))
49 changes: 49 additions & 0 deletions nodejs-examples/test-keyword-spotter-transducer.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
// Copyright (c) 2024 Xiaomi Corporation
const sherpa_onnx = require('sherpa-onnx');

function createKeywordSpotter() {
// Please download test files from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/kws-models
const config = {
'modelConfig': {
'transducer': {
'encoder':
'./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/encoder-epoch-12-avg-2-chunk-16-left-64.onnx',
'decoder':
'./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/decoder-epoch-12-avg-2-chunk-16-left-64.onnx',
'joiner':
'./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/joiner-epoch-12-avg-2-chunk-16-left-64.onnx',
},
'tokens':
'./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/tokens.txt',
},
keywords: 'w én s ēn t è k ǎ s uǒ @文森特卡索\n' +
'f ǎ g uó @法国'
};

return sherpa_onnx.createKws(config);
}

const kws = createKeywordSpotter();
const stream = kws.createStream();
const waveFilename =
'./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/test_wavs/3.wav';

const wave = sherpa_onnx.readWave(waveFilename);
stream.acceptWaveform(wave.sampleRate, wave.samples);

const tailPadding = new Float32Array(wave.sampleRate * 0.4);
stream.acceptWaveform(kws.config.featConfig.sampleRate, tailPadding);

const detectedKeywords = [];
while (kws.isReady(stream)) {
kws.decode(stream);
const keyword = kws.getResult(stream).keyword;
if (keyword != '') {
detectedKeywords.push(keyword);
}
}
console.log(detectedKeywords);

stream.free();
kws.free();
Loading
Loading