Add VAD and keyword spotting for the Node package with WebAssembly (#…

…1286)
k2-fsa · Aug 24, 2024 · 5ed8e31 · 5ed8e31
1 parent 537e163
commit 5ed8e31
Show file tree

Hide file tree

Showing 40 changed files with 456 additions and 524 deletions.
diff --git a/.github/scripts/test-nodejs-npm.sh b/.github/scripts/test-nodejs-npm.sh
@@ -9,6 +9,28 @@ git status
 ls -lh
 ls -lh node_modules
 
+echo '-----vad+whisper----------'
+
+curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2
+tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2
+rm sherpa-onnx-whisper-tiny.en.tar.bz2
+
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/Obama.wav
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
+node ./test-vad-with-non-streaming-asr-whisper.js
+rm Obama.wav
+rm silero_vad.onnx
+rm -rf sherpa-onnx-whisper-tiny.en
+
+echo "----------keyword spotting----------"
+
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/kws-models/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
+tar xvf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
+rm sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
+
+node ./test-keyword-spotter-transducer.js
+rm -rf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01
+
 # offline asr
 #
 curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2

diff --git a/.github/workflows/npm.yaml b/.github/workflows/npm.yaml
@@ -1,6 +1,9 @@
 name: npm
 
 on:
+  push:
+    branches:
+      - npm
   workflow_dispatch:
 
 concurrency:
@@ -27,6 +30,9 @@ jobs:
 
       - name: Install emsdk
         uses: mymindstorm/setup-emsdk@v14
+        with:
+          version: 3.1.51
+          actions-cache-folder: 'emsdk-cache'
 
       - name: View emsdk version
         shell: bash
@@ -51,8 +57,6 @@ jobs:
 
       - name: Build nodejs package
         shell: bash
-        env:
-          NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
         run: |
           ./build-wasm-simd-nodejs.sh
           cp -v build-wasm-simd-nodejs/install/bin/wasm/nodejs/*.js ./scripts/nodejs/
@@ -71,6 +75,29 @@ jobs:
 
           rm package.json.bak
 
+      - name: Collect files
+        shell: bash
+        run: |
+          dst=sherpa-onnx-wasm-nodejs
+          mkdir $dst
+          cp -v scripts/nodejs/* $dst
+          tar cvjf $dst.tar.bz2 $dst
+
+          echo "---"
+          ls -h $dst
+
+      - uses: actions/upload-artifact@v4
+        with:
+          name: sherpa-onnx-wasm-nodejs
+          path: ./*.tar.bz2
+
+      - name: Build nodejs package
+        shell: bash
+        env:
+          NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
+        run: |
+          cd scripts/nodejs
+
           git diff
 
           npm install

diff --git a/.github/workflows/test-nodejs.yaml b/.github/workflows/test-nodejs.yaml
@@ -55,6 +55,9 @@ jobs:
 
       - name: Install emsdk
         uses: mymindstorm/setup-emsdk@v14
+        with:
+          version: 3.1.51
+          actions-cache-folder: 'emsdk-cache'
 
       - name: View emsdk version
         shell: bash
@@ -109,6 +112,7 @@ jobs:
           node --version
           npm --version
           export d=scripts/nodejs
+          cat $d/index.js
 
           pushd $d
           npm install

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,3 +1,14 @@
+## 1.10.23
+
+* flutter: add lang, emotion, event to OfflineRecognizerResult (#1268)
+* Use a separate thread to initialize models for lazarus examples. (#1270)
+* Object pascal examples for recording and playing audio with portaudio. (#1271)
+* Text to speech API for Object Pascal. (#1273)
+* update kotlin api for better release native object and add user-friendly apis. (#1275)
+* Update wave-reader.cc to support 8/16/32-bit waves (#1278)
+* Add WebAssembly for VAD (#1281)
+* WebAssembly example for VAD + Non-streaming ASR (#1284)
+
 ## 1.10.22
 
 * Add Pascal API for reading wave files (#1243)

diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -11,7 +11,7 @@ project(sherpa-onnx)
 # ./nodejs-addon-examples
 # ./dart-api-examples/
 # ./CHANGELOG.md
-set(SHERPA_ONNX_VERSION "1.10.22")
+set(SHERPA_ONNX_VERSION "1.10.23")
 
 # Disable warning about
 #
@@ -206,6 +206,7 @@ if(SHERPA_ONNX_ENABLE_WASM_NODEJS)
   if(NOT SHERPA_ONNX_ENABLE_WASM)
     message(FATAL_ERROR "Please set SHERPA_ONNX_ENABLE_WASM to ON if you enable WASM for NodeJS")
   endif()
+  add_definitions(-DSHERPA_ONNX_ENABLE_WASM_KWS=1)
 endif()
 
 if(SHERPA_ONNX_ENABLE_WASM)

diff --git a/dart-api-examples/add-punctuations/pubspec.yaml b/dart-api-examples/add-punctuations/pubspec.yaml
@@ -9,7 +9,7 @@ environment:
   sdk: ^3.4.0
 
 dependencies:
-  sherpa_onnx: ^1.10.22
+  sherpa_onnx: ^1.10.23
   path: ^1.9.0
   args: ^2.5.0
 

diff --git a/dart-api-examples/audio-tagging/pubspec.yaml b/dart-api-examples/audio-tagging/pubspec.yaml
@@ -9,7 +9,7 @@ environment:
   sdk: ^3.4.0
 
 dependencies:
-  sherpa_onnx: ^1.10.22
+  sherpa_onnx: ^1.10.23
   path: ^1.9.0
   args: ^2.5.0
 

diff --git a/dart-api-examples/keyword-spotter/pubspec.yaml b/dart-api-examples/keyword-spotter/pubspec.yaml
@@ -9,7 +9,7 @@ environment:
   sdk: ^3.4.0
 
 dependencies:
-  sherpa_onnx: ^1.10.22
+  sherpa_onnx: ^1.10.23
   # sherpa_onnx:
   #   path: ../../flutter/sherpa_onnx
   path: ^1.9.0

diff --git a/dart-api-examples/non-streaming-asr/pubspec.yaml b/dart-api-examples/non-streaming-asr/pubspec.yaml
@@ -10,7 +10,7 @@ environment:
 
 # Add regular dependencies here.
 dependencies:
-  sherpa_onnx: ^1.10.22
+  sherpa_onnx: ^1.10.23
   path: ^1.9.0
   args: ^2.5.0
 

diff --git a/dart-api-examples/speaker-identification/pubspec.yaml b/dart-api-examples/speaker-identification/pubspec.yaml
@@ -9,7 +9,7 @@ environment:
   sdk: ^3.4.0
 
 dependencies:
-  sherpa_onnx: ^1.10.22
+  sherpa_onnx: ^1.10.23
   path: ^1.9.0
   args: ^2.5.0
 

diff --git a/dart-api-examples/streaming-asr/pubspec.yaml b/dart-api-examples/streaming-asr/pubspec.yaml
@@ -11,7 +11,7 @@ environment:
 
 # Add regular dependencies here.
 dependencies:
-  sherpa_onnx: ^1.10.22
+  sherpa_onnx: ^1.10.23
   path: ^1.9.0
   args: ^2.5.0
 

diff --git a/dart-api-examples/tts/pubspec.yaml b/dart-api-examples/tts/pubspec.yaml
@@ -8,7 +8,7 @@ environment:
 
 # Add regular dependencies here.
 dependencies:
-  sherpa_onnx: ^1.10.22
+  sherpa_onnx: ^1.10.23
   path: ^1.9.0
   args: ^2.5.0
 

diff --git a/dart-api-examples/vad-with-non-streaming-asr/pubspec.yaml b/dart-api-examples/vad-with-non-streaming-asr/pubspec.yaml
@@ -10,7 +10,7 @@ environment:
   sdk: ^3.4.0
 
 dependencies:
-  sherpa_onnx: ^1.10.22
+  sherpa_onnx: ^1.10.23
   path: ^1.9.0
   args: ^2.5.0
 

diff --git a/dart-api-examples/vad/pubspec.yaml b/dart-api-examples/vad/pubspec.yaml
@@ -9,7 +9,7 @@ environment:
   sdk: ^3.4.0
 
 dependencies:
-  sherpa_onnx: ^1.10.22
+  sherpa_onnx: ^1.10.23
   path: ^1.9.0
   args: ^2.5.0
 

diff --git a/flutter-examples/streaming_asr/pubspec.yaml b/flutter-examples/streaming_asr/pubspec.yaml
@@ -5,7 +5,7 @@ description: >
 
 publish_to: 'none'
 
-version: 1.10.22
+version: 1.10.23
 
 topics:
   - speech-recognition
@@ -30,7 +30,7 @@ dependencies:
   record: ^5.1.0
   url_launcher: ^6.2.6
 
-  sherpa_onnx: ^1.10.22
+  sherpa_onnx: ^1.10.23
   # sherpa_onnx:
     # path: ../../flutter/sherpa_onnx
 

diff --git a/flutter-examples/tts/pubspec.yaml b/flutter-examples/tts/pubspec.yaml
@@ -5,7 +5,7 @@ description: >
 
 publish_to: 'none' # Remove this line if you wish to publish to pub.dev
 
-version: 1.10.22
+version: 1.10.23
 
 environment:
   sdk: '>=3.4.0 <4.0.0'
@@ -17,7 +17,7 @@ dependencies:
   cupertino_icons: ^1.0.6
   path_provider: ^2.1.3
   path: ^1.9.0
-  sherpa_onnx: ^1.10.22
+  sherpa_onnx: ^1.10.23
   url_launcher: ^6.2.6
   audioplayers: ^5.0.0
 

diff --git a/flutter/sherpa_onnx/pubspec.yaml b/flutter/sherpa_onnx/pubspec.yaml
@@ -17,7 +17,7 @@ topics:
   - voice-activity-detection
 
 # remember to change the version in ../sherpa_onnx_macos/macos/sherpa_onnx_macos.podspec
-version: 1.10.22
+version: 1.10.23
 
 homepage: https://github.com/k2-fsa/sherpa-onnx
 
@@ -30,23 +30,23 @@ dependencies:
   flutter:
     sdk: flutter
 
-  sherpa_onnx_android: ^1.10.22
+  sherpa_onnx_android: ^1.10.23
   # sherpa_onnx_android:
   #   path: ../sherpa_onnx_android
 
-  sherpa_onnx_macos: ^1.10.22
+  sherpa_onnx_macos: ^1.10.23
   # sherpa_onnx_macos:
   #   path: ../sherpa_onnx_macos
 
-  sherpa_onnx_linux: ^1.10.22
+  sherpa_onnx_linux: ^1.10.23
   # sherpa_onnx_linux:
   #   path: ../sherpa_onnx_linux
     #
-  sherpa_onnx_windows: ^1.10.22
+  sherpa_onnx_windows: ^1.10.23
   # sherpa_onnx_windows:
   #   path: ../sherpa_onnx_windows
 
-  sherpa_onnx_ios: ^1.10.22
+  sherpa_onnx_ios: ^1.10.23
   # sherpa_onnx_ios:
   #   path: ../sherpa_onnx_ios
 

diff --git a/flutter/sherpa_onnx_ios/ios/sherpa_onnx_ios.podspec b/flutter/sherpa_onnx_ios/ios/sherpa_onnx_ios.podspec
@@ -7,7 +7,7 @@
 # https://groups.google.com/g/dart-ffi/c/nUATMBy7r0c
 Pod::Spec.new do |s|
   s.name             = 'sherpa_onnx_ios'
-  s.version          = '1.10.22'
+  s.version          = '1.10.23'
   s.summary          = 'A new Flutter FFI plugin project.'
   s.description      = <<-DESC
 A new Flutter FFI plugin project.

diff --git a/flutter/sherpa_onnx_macos/macos/sherpa_onnx_macos.podspec b/flutter/sherpa_onnx_macos/macos/sherpa_onnx_macos.podspec
@@ -4,7 +4,7 @@
 #
 Pod::Spec.new do |s|
   s.name             = 'sherpa_onnx_macos'
-  s.version          = '1.10.22'
+  s.version          = '1.10.23'
   s.summary          = 'sherpa-onnx Flutter FFI plugin project.'
   s.description      = <<-DESC
 sherpa-onnx Flutter FFI plugin project.

diff --git a/new-release.sh b/new-release.sh
@@ -0,0 +1,7 @@
+#!/usr/bin/env bash
+
+find flutter -name *.yaml -type f -exec sed -i.bak 's/1\.10\.22/1\.10\.23/g' {} \;
+find dart-api-examples -name *.yaml -type f -exec sed -i.bak 's/1\.10\.22/1\.10\.23/g' {} \;
+find flutter-examples -name *.yaml -type f -exec sed -i.bak 's/1\.10\.22/1\.10\.23/g' {} \;
+find flutter -name *.podspec -type f -exec sed -i.bak 's/1\.10\.22/1\.10\.23/g' {} \;
+find nodejs-addon-examples -name package.json -type f -exec sed -i.bak 's/1\.10\.22/1\.10\.23/g' {} \;
diff --git a/nodejs-addon-examples/package.json b/nodejs-addon-examples/package.json
@@ -1,5 +1,5 @@
 {
   "dependencies": {
-    "sherpa-onnx-node": "^1.10.22"
+    "sherpa-onnx-node": "^1.10.23"
   }
 }
diff --git a/nodejs-addon-examples/test_keyword_spotter_transducer.js b/nodejs-addon-examples/test_keyword_spotter_transducer.js
@@ -42,11 +42,11 @@ stream.acceptWaveform({samples: tailPadding, sampleRate: wave.sampleRate});
 
 const detectedKeywords = [];
 while (kws.isReady(stream)) {
+  kws.decode(stream);
   const keyword = kws.getResult(stream).keyword;
   if (keyword != '') {
     detectedKeywords.push(keyword);
   }
-  kws.decode(stream);
 }
 let stop = Date.now();
 

diff --git a/nodejs-addon-examples/test_vad_with_non_streaming_asr_whisper.js b/nodejs-addon-examples/test_vad_with_non_streaming_asr_whisper.js
@@ -120,8 +120,8 @@ console.log('Done')
 const elapsed_seconds = (stop - start) / 1000;
 const duration = wave.samples.length / wave.sampleRate;
 const real_time_factor = elapsed_seconds / duration;
-console.log('Wave duration', duration.toFixed(3), 'secodns')
-console.log('Elapsed', elapsed_seconds.toFixed(3), 'secodns')
+console.log('Wave duration', duration.toFixed(3), 'seconds')
+console.log('Elapsed', elapsed_seconds.toFixed(3), 'seconds')
 console.log(
     `RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
     real_time_factor.toFixed(3))
diff --git a/nodejs-examples/test-keyword-spotter-transducer.js b/nodejs-examples/test-keyword-spotter-transducer.js
@@ -0,0 +1,49 @@
+// Copyright (c)  2024  Xiaomi Corporation
+const sherpa_onnx = require('sherpa-onnx');
+
+function createKeywordSpotter() {
+  // Please download test files from
+  // https://github.com/k2-fsa/sherpa-onnx/releases/tag/kws-models
+  const config = {
+    'modelConfig': {
+      'transducer': {
+        'encoder':
+            './sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/encoder-epoch-12-avg-2-chunk-16-left-64.onnx',
+        'decoder':
+            './sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/decoder-epoch-12-avg-2-chunk-16-left-64.onnx',
+        'joiner':
+            './sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/joiner-epoch-12-avg-2-chunk-16-left-64.onnx',
+      },
+      'tokens':
+          './sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/tokens.txt',
+    },
+    keywords: 'w én s ēn t è k ǎ s uǒ  @文森特卡索\n' +
+        'f ǎ g uó @法国'
+  };
+
+  return sherpa_onnx.createKws(config);
+}
+
+const kws = createKeywordSpotter();
+const stream = kws.createStream();
+const waveFilename =
+    './sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/test_wavs/3.wav';
+
+const wave = sherpa_onnx.readWave(waveFilename);
+stream.acceptWaveform(wave.sampleRate, wave.samples);
+
+const tailPadding = new Float32Array(wave.sampleRate * 0.4);
+stream.acceptWaveform(kws.config.featConfig.sampleRate, tailPadding);
+
+const detectedKeywords = [];
+while (kws.isReady(stream)) {
+  kws.decode(stream);
+  const keyword = kws.getResult(stream).keyword;
+  if (keyword != '') {
+    detectedKeywords.push(keyword);
+  }
+}
+console.log(detectedKeywords);
+
+stream.free();
+kws.free();