Skip to content

Commit

Permalink
Add VAD and keyword spotting for the Node package with WebAssembly (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
csukuangfj authored Aug 24, 2024
1 parent 537e163 commit 5ed8e31
Show file tree
Hide file tree
Showing 40 changed files with 456 additions and 524 deletions.
22 changes: 22 additions & 0 deletions .github/scripts/test-nodejs-npm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,28 @@ git status
ls -lh
ls -lh node_modules

echo '-----vad+whisper----------'

curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2
tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2
rm sherpa-onnx-whisper-tiny.en.tar.bz2

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/Obama.wav
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
node ./test-vad-with-non-streaming-asr-whisper.js
rm Obama.wav
rm silero_vad.onnx
rm -rf sherpa-onnx-whisper-tiny.en

echo "----------keyword spotting----------"

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/kws-models/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
tar xvf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
rm sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2

node ./test-keyword-spotter-transducer.js
rm -rf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01

# offline asr
#
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
Expand Down
31 changes: 29 additions & 2 deletions .github/workflows/npm.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
name: npm

on:
push:
branches:
- npm
workflow_dispatch:

concurrency:
Expand All @@ -27,6 +30,9 @@ jobs:

- name: Install emsdk
uses: mymindstorm/setup-emsdk@v14
with:
version: 3.1.51
actions-cache-folder: 'emsdk-cache'

- name: View emsdk version
shell: bash
Expand All @@ -51,8 +57,6 @@ jobs:
- name: Build nodejs package
shell: bash
env:
NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
run: |
./build-wasm-simd-nodejs.sh
cp -v build-wasm-simd-nodejs/install/bin/wasm/nodejs/*.js ./scripts/nodejs/
Expand All @@ -71,6 +75,29 @@ jobs:
rm package.json.bak
- name: Collect files
shell: bash
run: |
dst=sherpa-onnx-wasm-nodejs
mkdir $dst
cp -v scripts/nodejs/* $dst
tar cvjf $dst.tar.bz2 $dst
echo "---"
ls -h $dst
- uses: actions/upload-artifact@v4
with:
name: sherpa-onnx-wasm-nodejs
path: ./*.tar.bz2

- name: Build nodejs package
shell: bash
env:
NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
run: |
cd scripts/nodejs
git diff
npm install
Expand Down
4 changes: 4 additions & 0 deletions .github/workflows/test-nodejs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,9 @@ jobs:

- name: Install emsdk
uses: mymindstorm/setup-emsdk@v14
with:
version: 3.1.51
actions-cache-folder: 'emsdk-cache'

- name: View emsdk version
shell: bash
Expand Down Expand Up @@ -109,6 +112,7 @@ jobs:
node --version
npm --version
export d=scripts/nodejs
cat $d/index.js
pushd $d
npm install
Expand Down
11 changes: 11 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,14 @@
## 1.10.23

* flutter: add lang, emotion, event to OfflineRecognizerResult (#1268)
* Use a separate thread to initialize models for lazarus examples. (#1270)
* Object pascal examples for recording and playing audio with portaudio. (#1271)
* Text to speech API for Object Pascal. (#1273)
* update kotlin api for better release native object and add user-friendly apis. (#1275)
* Update wave-reader.cc to support 8/16/32-bit waves (#1278)
* Add WebAssembly for VAD (#1281)
* WebAssembly example for VAD + Non-streaming ASR (#1284)

## 1.10.22

* Add Pascal API for reading wave files (#1243)
Expand Down
3 changes: 2 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ project(sherpa-onnx)
# ./nodejs-addon-examples
# ./dart-api-examples/
# ./CHANGELOG.md
set(SHERPA_ONNX_VERSION "1.10.22")
set(SHERPA_ONNX_VERSION "1.10.23")

# Disable warning about
#
Expand Down Expand Up @@ -206,6 +206,7 @@ if(SHERPA_ONNX_ENABLE_WASM_NODEJS)
if(NOT SHERPA_ONNX_ENABLE_WASM)
message(FATAL_ERROR "Please set SHERPA_ONNX_ENABLE_WASM to ON if you enable WASM for NodeJS")
endif()
add_definitions(-DSHERPA_ONNX_ENABLE_WASM_KWS=1)
endif()

if(SHERPA_ONNX_ENABLE_WASM)
Expand Down
2 changes: 1 addition & 1 deletion dart-api-examples/add-punctuations/pubspec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ environment:
sdk: ^3.4.0

dependencies:
sherpa_onnx: ^1.10.22
sherpa_onnx: ^1.10.23
path: ^1.9.0
args: ^2.5.0

Expand Down
2 changes: 1 addition & 1 deletion dart-api-examples/audio-tagging/pubspec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ environment:
sdk: ^3.4.0

dependencies:
sherpa_onnx: ^1.10.22
sherpa_onnx: ^1.10.23
path: ^1.9.0
args: ^2.5.0

Expand Down
2 changes: 1 addition & 1 deletion dart-api-examples/keyword-spotter/pubspec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ environment:
sdk: ^3.4.0

dependencies:
sherpa_onnx: ^1.10.22
sherpa_onnx: ^1.10.23
# sherpa_onnx:
# path: ../../flutter/sherpa_onnx
path: ^1.9.0
Expand Down
2 changes: 1 addition & 1 deletion dart-api-examples/non-streaming-asr/pubspec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ environment:

# Add regular dependencies here.
dependencies:
sherpa_onnx: ^1.10.22
sherpa_onnx: ^1.10.23
path: ^1.9.0
args: ^2.5.0

Expand Down
2 changes: 1 addition & 1 deletion dart-api-examples/speaker-identification/pubspec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ environment:
sdk: ^3.4.0

dependencies:
sherpa_onnx: ^1.10.22
sherpa_onnx: ^1.10.23
path: ^1.9.0
args: ^2.5.0

Expand Down
2 changes: 1 addition & 1 deletion dart-api-examples/streaming-asr/pubspec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ environment:

# Add regular dependencies here.
dependencies:
sherpa_onnx: ^1.10.22
sherpa_onnx: ^1.10.23
path: ^1.9.0
args: ^2.5.0

Expand Down
2 changes: 1 addition & 1 deletion dart-api-examples/tts/pubspec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ environment:

# Add regular dependencies here.
dependencies:
sherpa_onnx: ^1.10.22
sherpa_onnx: ^1.10.23
path: ^1.9.0
args: ^2.5.0

Expand Down
2 changes: 1 addition & 1 deletion dart-api-examples/vad-with-non-streaming-asr/pubspec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ environment:
sdk: ^3.4.0

dependencies:
sherpa_onnx: ^1.10.22
sherpa_onnx: ^1.10.23
path: ^1.9.0
args: ^2.5.0

Expand Down
2 changes: 1 addition & 1 deletion dart-api-examples/vad/pubspec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ environment:
sdk: ^3.4.0

dependencies:
sherpa_onnx: ^1.10.22
sherpa_onnx: ^1.10.23
path: ^1.9.0
args: ^2.5.0

Expand Down
4 changes: 2 additions & 2 deletions flutter-examples/streaming_asr/pubspec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ description: >
publish_to: 'none'

version: 1.10.22
version: 1.10.23

topics:
- speech-recognition
Expand All @@ -30,7 +30,7 @@ dependencies:
record: ^5.1.0
url_launcher: ^6.2.6

sherpa_onnx: ^1.10.22
sherpa_onnx: ^1.10.23
# sherpa_onnx:
# path: ../../flutter/sherpa_onnx

Expand Down
4 changes: 2 additions & 2 deletions flutter-examples/tts/pubspec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ description: >
publish_to: 'none' # Remove this line if you wish to publish to pub.dev

version: 1.10.22
version: 1.10.23

environment:
sdk: '>=3.4.0 <4.0.0'
Expand All @@ -17,7 +17,7 @@ dependencies:
cupertino_icons: ^1.0.6
path_provider: ^2.1.3
path: ^1.9.0
sherpa_onnx: ^1.10.22
sherpa_onnx: ^1.10.23
url_launcher: ^6.2.6
audioplayers: ^5.0.0

Expand Down
12 changes: 6 additions & 6 deletions flutter/sherpa_onnx/pubspec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ topics:
- voice-activity-detection

# remember to change the version in ../sherpa_onnx_macos/macos/sherpa_onnx_macos.podspec
version: 1.10.22
version: 1.10.23

homepage: https://github.com/k2-fsa/sherpa-onnx

Expand All @@ -30,23 +30,23 @@ dependencies:
flutter:
sdk: flutter

sherpa_onnx_android: ^1.10.22
sherpa_onnx_android: ^1.10.23
# sherpa_onnx_android:
# path: ../sherpa_onnx_android

sherpa_onnx_macos: ^1.10.22
sherpa_onnx_macos: ^1.10.23
# sherpa_onnx_macos:
# path: ../sherpa_onnx_macos

sherpa_onnx_linux: ^1.10.22
sherpa_onnx_linux: ^1.10.23
# sherpa_onnx_linux:
# path: ../sherpa_onnx_linux
#
sherpa_onnx_windows: ^1.10.22
sherpa_onnx_windows: ^1.10.23
# sherpa_onnx_windows:
# path: ../sherpa_onnx_windows

sherpa_onnx_ios: ^1.10.22
sherpa_onnx_ios: ^1.10.23
# sherpa_onnx_ios:
# path: ../sherpa_onnx_ios

Expand Down
2 changes: 1 addition & 1 deletion flutter/sherpa_onnx_ios/ios/sherpa_onnx_ios.podspec
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
# https://groups.google.com/g/dart-ffi/c/nUATMBy7r0c
Pod::Spec.new do |s|
s.name = 'sherpa_onnx_ios'
s.version = '1.10.22'
s.version = '1.10.23'
s.summary = 'A new Flutter FFI plugin project.'
s.description = <<-DESC
A new Flutter FFI plugin project.
Expand Down
2 changes: 1 addition & 1 deletion flutter/sherpa_onnx_macos/macos/sherpa_onnx_macos.podspec
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
#
Pod::Spec.new do |s|
s.name = 'sherpa_onnx_macos'
s.version = '1.10.22'
s.version = '1.10.23'
s.summary = 'sherpa-onnx Flutter FFI plugin project.'
s.description = <<-DESC
sherpa-onnx Flutter FFI plugin project.
Expand Down
7 changes: 7 additions & 0 deletions new-release.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#!/usr/bin/env bash

find flutter -name *.yaml -type f -exec sed -i.bak 's/1\.10\.22/1\.10\.23/g' {} \;
find dart-api-examples -name *.yaml -type f -exec sed -i.bak 's/1\.10\.22/1\.10\.23/g' {} \;
find flutter-examples -name *.yaml -type f -exec sed -i.bak 's/1\.10\.22/1\.10\.23/g' {} \;
find flutter -name *.podspec -type f -exec sed -i.bak 's/1\.10\.22/1\.10\.23/g' {} \;
find nodejs-addon-examples -name package.json -type f -exec sed -i.bak 's/1\.10\.22/1\.10\.23/g' {} \;
2 changes: 1 addition & 1 deletion nodejs-addon-examples/package.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"dependencies": {
"sherpa-onnx-node": "^1.10.22"
"sherpa-onnx-node": "^1.10.23"
}
}
2 changes: 1 addition & 1 deletion nodejs-addon-examples/test_keyword_spotter_transducer.js
Original file line number Diff line number Diff line change
Expand Up @@ -42,11 +42,11 @@ stream.acceptWaveform({samples: tailPadding, sampleRate: wave.sampleRate});

const detectedKeywords = [];
while (kws.isReady(stream)) {
kws.decode(stream);
const keyword = kws.getResult(stream).keyword;
if (keyword != '') {
detectedKeywords.push(keyword);
}
kws.decode(stream);
}
let stop = Date.now();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -120,8 +120,8 @@ console.log('Done')
const elapsed_seconds = (stop - start) / 1000;
const duration = wave.samples.length / wave.sampleRate;
const real_time_factor = elapsed_seconds / duration;
console.log('Wave duration', duration.toFixed(3), 'secodns')
console.log('Elapsed', elapsed_seconds.toFixed(3), 'secodns')
console.log('Wave duration', duration.toFixed(3), 'seconds')
console.log('Elapsed', elapsed_seconds.toFixed(3), 'seconds')
console.log(
`RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
real_time_factor.toFixed(3))
49 changes: 49 additions & 0 deletions nodejs-examples/test-keyword-spotter-transducer.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
// Copyright (c) 2024 Xiaomi Corporation
const sherpa_onnx = require('sherpa-onnx');

function createKeywordSpotter() {
// Please download test files from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/kws-models
const config = {
'modelConfig': {
'transducer': {
'encoder':
'./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/encoder-epoch-12-avg-2-chunk-16-left-64.onnx',
'decoder':
'./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/decoder-epoch-12-avg-2-chunk-16-left-64.onnx',
'joiner':
'./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/joiner-epoch-12-avg-2-chunk-16-left-64.onnx',
},
'tokens':
'./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/tokens.txt',
},
keywords: 'w én s ēn t è k ǎ s uǒ @文森特卡索\n' +
'f ǎ g uó @法国'
};

return sherpa_onnx.createKws(config);
}

const kws = createKeywordSpotter();
const stream = kws.createStream();
const waveFilename =
'./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/test_wavs/3.wav';

const wave = sherpa_onnx.readWave(waveFilename);
stream.acceptWaveform(wave.sampleRate, wave.samples);

const tailPadding = new Float32Array(wave.sampleRate * 0.4);
stream.acceptWaveform(kws.config.featConfig.sampleRate, tailPadding);

const detectedKeywords = [];
while (kws.isReady(stream)) {
kws.decode(stream);
const keyword = kws.getResult(stream).keyword;
if (keyword != '') {
detectedKeywords.push(keyword);
}
}
console.log(detectedKeywords);

stream.free();
kws.free();
Loading

0 comments on commit 5ed8e31

Please sign in to comment.