Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add WebAssembly for Kws #648

Merged
merged 19 commits into from
Mar 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ option(SHERPA_ONNX_ENABLE_GPU "Enable ONNX Runtime GPU support" OFF)
option(SHERPA_ONNX_ENABLE_WASM "Whether to enable WASM" OFF)
option(SHERPA_ONNX_ENABLE_WASM_TTS "Whether to enable WASM for TTS" OFF)
option(SHERPA_ONNX_ENABLE_WASM_ASR "Whether to enable WASM for ASR" OFF)
option(SHERPA_ONNX_ENABLE_WASM_KWS "Whether to enable WASM for KWS" OFF)
option(SHERPA_ONNX_ENABLE_WASM_NODEJS "Whether to enable WASM for NodeJS" OFF)
option(SHERPA_ONNX_ENABLE_BINARY "Whether to build binaries" ON)
option(SHERPA_ONNX_LINK_LIBSTDCPP_STATICALLY "True to link libstdc++ statically. Used only when BUILD_SHARED_LIBS is OFF on Linux" ON)
Expand Down Expand Up @@ -135,6 +136,10 @@ if(SHERPA_ONNX_ENABLE_WASM)
add_definitions(-DSHERPA_ONNX_ENABLE_WASM=1)
endif()

if(SHERPA_ONNX_ENABLE_WASM_KWS)
add_definitions(-DSHERPA_ONNX_ENABLE_WASM_KWS=1)
endif()

if(NOT CMAKE_CXX_STANDARD)
set(CMAKE_CXX_STANDARD 14 CACHE STRING "The C++ version to be used.")
endif()
Expand Down
56 changes: 56 additions & 0 deletions build-wasm-simd-kws.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
#!/usr/bin/env bash

if [ x"$EMSCRIPTEN" == x"" ]; then
if ! command -v emcc &> /dev/null; then
echo "Please install emscripten first"
echo ""
echo "You can use the following commands to install it:"
echo ""
echo "git clone https://github.com/emscripten-core/emsdk.git"
echo "cd emsdk"
echo "git pull"
echo "./emsdk install latest"
echo "./emsdk activate latest"
echo "source ./emsdk_env.sh"
exit 1
else
EMSCRIPTEN=$(dirname $(realpath $(which emcc)))
fi
fi

export EMSCRIPTEN=$EMSCRIPTEN
echo "EMSCRIPTEN: $EMSCRIPTEN"
if [ ! -f $EMSCRIPTEN/cmake/Modules/Platform/Emscripten.cmake ]; then
echo "Cannot find $EMSCRIPTEN/cmake/Modules/Platform/Emscripten.cmake"
echo "Please make sure you have installed emsdk correctly"
exit 1
fi

mkdir -p build-wasm-simd-kws
pushd build-wasm-simd-kws

export SHERPA_ONNX_IS_USING_BUILD_WASM_SH=ON

cmake \
-DCMAKE_INSTALL_PREFIX=./install \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_TOOLCHAIN_FILE=$EMSCRIPTEN/cmake/Modules/Platform/Emscripten.cmake \
\
-DSHERPA_ONNX_ENABLE_PYTHON=OFF \
-DSHERPA_ONNX_ENABLE_TESTS=OFF \
-DSHERPA_ONNX_ENABLE_CHECK=OFF \
-DBUILD_SHARED_LIBS=OFF \
-DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
-DSHERPA_ONNX_ENABLE_JNI=OFF \
-DSHERPA_ONNX_ENABLE_C_API=ON \
-DSHERPA_ONNX_ENABLE_WEBSOCKET=OFF \
-DSHERPA_ONNX_ENABLE_GPU=OFF \
-DSHERPA_ONNX_ENABLE_WASM=ON \
-DSHERPA_ONNX_ENABLE_WASM_KWS=ON \
-DSHERPA_ONNX_ENABLE_BINARY=OFF \
-DSHERPA_ONNX_LINK_LIBSTDCPP_STATICALLY=OFF \
..
make -j8
make install

ls -lh install/bin/wasm
7 changes: 3 additions & 4 deletions sherpa-onnx/c-api/c-api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -481,7 +481,7 @@ SherpaOnnxKeywordSpotter* CreateKeywordSpotter(
SherpaOnnxKeywordSpotter* spotter = new SherpaOnnxKeywordSpotter;

spotter->impl =
std::make_unique<sherpa_onnx::KeywordSpotter>(spotter_config);
std::make_unique<sherpa_onnx::KeywordSpotter>(spotter_config);

return spotter;
}
Expand All @@ -493,7 +493,7 @@ void DestroyKeywordSpotter(SherpaOnnxKeywordSpotter* spotter) {
SherpaOnnxOnlineStream* CreateKeywordStream(
const SherpaOnnxKeywordSpotter* spotter) {
SherpaOnnxOnlineStream* stream =
new SherpaOnnxOnlineStream(spotter->impl->CreateStream());
new SherpaOnnxOnlineStream(spotter->impl->CreateStream());
return stream;
}

Expand All @@ -512,7 +512,7 @@ void DecodeMultipleKeywordStreams(
int32_t n) {
std::vector<sherpa_onnx::OnlineStream*> ss(n);
for (int32_t i = 0; i != n; ++i) {
ss[i] = streams[i]->impl.get();
ss[i] = streams[i]->impl.get();
}
spotter->impl->DecodeStreams(ss.data(), n);
}
Expand Down Expand Up @@ -593,7 +593,6 @@ void DestroyKeywordResult(const SherpaOnnxKeywordResult *r) {
}
}


// ============================================================
// For VAD
// ============================================================
Expand Down
9 changes: 8 additions & 1 deletion sherpa-onnx/csrc/keyword-spotter-transducer-impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -266,15 +266,22 @@ class KeywordSpotterTransducerImpl : public KeywordSpotterImpl {
}

void InitKeywords() {
#ifdef SHERPA_ONNX_ENABLE_WASM_KWS
// Due to the limitations of the wasm file system,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you pass a keyword file from wasm?

We have been doing this for model files, such as tokens.txt.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure, but it needs to be recompiled when the keywords is modified, which is very inconvenient. Because token.txt and keywords.txt are both packaged into the sherpa-onnx-wasm-kws-main.data file, they cannot be modified and can only be recompiled to generate. What do you think?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, I see. Then please keep using your current approach.

// the keyword_file variable is directly parsed as a string of keywords
// if WASM KWS on
std::istringstream is(config_.keywords_file);
InitKeywords(is);
#else
// each line in keywords_file contains space-separated words

std::ifstream is(config_.keywords_file);
if (!is) {
SHERPA_ONNX_LOGE("Open keywords file failed: %s",
config_.keywords_file.c_str());
exit(-1);
}
InitKeywords(is);
#endif
}

#if __ANDROID_API__ >= 9
Expand Down
7 changes: 7 additions & 0 deletions sherpa-onnx/csrc/keyword-spotter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -94,10 +94,17 @@ bool KeywordSpotterConfig::Validate() const {
SHERPA_ONNX_LOGE("Please provide --keywords-file.");
return false;
}

#ifndef SHERPA_ONNX_ENABLE_WASM_KWS
// due to the limitations of the wasm file system,
// keywords file will be packaged into the sherpa-onnx-wasm-kws-main.data file
// Solution: take keyword_file variable is directly
// parsed as a string of keywords
if (!std::ifstream(keywords_file.c_str()).good()) {
SHERPA_ONNX_LOGE("Keywords file %s does not exist.", keywords_file.c_str());
return false;
}
#endif

return model_config.Validate();
}
Expand Down
4 changes: 1 addition & 3 deletions sherpa-onnx/csrc/transducer-keyword-decoder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,14 @@
//
// Copyright (c) 2023-2024 Xiaomi Corporation

#include "sherpa-onnx/csrc/transducer-keyword-decoder.h"

#include <algorithm>
#include <cmath>
#include <cstring>
#include <utility>
#include <vector>

#include "sherpa-onnx/csrc/log.h"
#include "sherpa-onnx/csrc/onnx-utils.h"
#include "sherpa-onnx/csrc/transducer-keyword-decoder.h"

namespace sherpa_onnx {

Expand Down
4 changes: 4 additions & 0 deletions wasm/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@ if(SHERPA_ONNX_ENABLE_WASM_ASR)
add_subdirectory(asr)
endif()

if(SHERPA_ONNX_ENABLE_WASM_KWS)
add_subdirectory(kws)
endif()

if(SHERPA_ONNX_ENABLE_WASM_NODEJS)
add_subdirectory(nodejs)
endif()
54 changes: 54 additions & 0 deletions wasm/kws/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
if(NOT $ENV{SHERPA_ONNX_IS_USING_BUILD_WASM_SH})
message(FATAL_ERROR "Please use ./build-wasm-simd-kws.sh to build for wasm KWS")
endif()

if(NOT EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/assets/decoder-epoch-12-avg-2-chunk-16-left-64.onnx")
message(WARNING "${CMAKE_CURRENT_SOURCE_DIR}/assets/decoder-epoch-12-avg-2-chunk-16-left-64.onnx does not exist")
message(FATAL_ERROR "Please read ${CMAKE_CURRENT_SOURCE_DIR}/assets/README.md before you continue")
endif()

set(exported_functions
AcceptWaveform
CreateKeywordSpotter
DestroyKeywordSpotter
CreateKeywordStream
DecodeKeywordStream
GetKeywordResult
DestroyKeywordResult
IsKeywordStreamReady
InputFinished
)
set(mangled_exported_functions)
foreach(x IN LISTS exported_functions)
list(APPEND mangled_exported_functions "_${x}")
endforeach()

list(JOIN mangled_exported_functions "," all_exported_functions)

include_directories(${CMAKE_SOURCE_DIR})
set(MY_FLAGS "-s FORCE_FILESYSTEM=1 -s INITIAL_MEMORY=512MB -s ALLOW_MEMORY_GROWTH=1")
string(APPEND MY_FLAGS " -sSTACK_SIZE=10485760 ")
string(APPEND MY_FLAGS " -sEXPORTED_FUNCTIONS=[_CopyHeap,_malloc,_free,${all_exported_functions}] ")
string(APPEND MY_FLAGS "--preload-file ${CMAKE_CURRENT_SOURCE_DIR}/assets@. ")
string(APPEND MY_FLAGS " -sEXPORTED_RUNTIME_METHODS=['ccall','stringToUTF8','setValue','getValue','lengthBytesUTF8','UTF8ToString'] ")
message(STATUS "MY_FLAGS: ${MY_FLAGS}")

set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${MY_FLAGS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${MY_FLAGS}")
set(CMAKE_EXECUTBLE_LINKER_FLAGS "${CMAKE_EXECUTBLE_LINKER_FLAGS} ${MY_FLAGS}")

add_executable(sherpa-onnx-wasm-kws-main sherpa-onnx-wasm-main-kws.cc)
target_link_libraries(sherpa-onnx-wasm-kws-main sherpa-onnx-c-api)
install(TARGETS sherpa-onnx-wasm-kws-main DESTINATION bin/wasm)

install(
FILES
"sherpa-onnx-kws.js"
"app.js"
"index.html"
"$<TARGET_FILE_DIR:sherpa-onnx-wasm-kws-main>/sherpa-onnx-wasm-kws-main.js"
"$<TARGET_FILE_DIR:sherpa-onnx-wasm-kws-main>/sherpa-onnx-wasm-kws-main.wasm"
"$<TARGET_FILE_DIR:sherpa-onnx-wasm-kws-main>/sherpa-onnx-wasm-kws-main.data"
DESTINATION
bin/wasm
)
Loading
Loading