From b5cdc99a3fc5592e4207cebb349f9c2a40fbc2f8 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Sat, 2 Mar 2024 23:52:33 +0800 Subject: [PATCH] Add TTS WebAssembly for NodeJS. --- CMakeLists.txt | 8 ++ build-wasm-simd-nodejs.sh | 63 +++++++++++ wasm/CMakeLists.txt | 4 + wasm/asr/sherpa-onnx-wasm-main-asr.cc | 2 +- wasm/nodejs/CMakeLists.txt | 66 +++++++++++ wasm/nodejs/sherpa-onnx-wasm-nodejs.cc | 40 +++++++ wasm/tts/app-tts.js | 2 +- wasm/tts/sherpa-onnx-tts.js | 148 +++++++++++++++---------- wasm/tts/sherpa-onnx-wasm-main-tts.cc | 2 +- 9 files changed, 272 insertions(+), 63 deletions(-) create mode 100755 build-wasm-simd-nodejs.sh create mode 100644 wasm/nodejs/CMakeLists.txt create mode 100644 wasm/nodejs/sherpa-onnx-wasm-nodejs.cc diff --git a/CMakeLists.txt b/CMakeLists.txt index 4e8e67e69..e890bfb7e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -23,6 +23,7 @@ option(SHERPA_ONNX_ENABLE_GPU "Enable ONNX Runtime GPU support" OFF) option(SHERPA_ONNX_ENABLE_WASM "Whether to enable WASM" OFF) option(SHERPA_ONNX_ENABLE_WASM_TTS "Whether to enable WASM for TTS" OFF) option(SHERPA_ONNX_ENABLE_WASM_ASR "Whether to enable WASM for ASR" OFF) +option(SHERPA_ONNX_ENABLE_WASM_NODEJS "Whether to enable WASM for NodeJS" OFF) option(SHERPA_ONNX_ENABLE_BINARY "Whether to build binaries" ON) option(SHERPA_ONNX_LINK_LIBSTDCPP_STATICALLY "True to link libstdc++ statically. Used only when BUILD_SHARED_LIBS is OFF on Linux" ON) @@ -108,6 +109,7 @@ message(STATUS "SHERPA_ONNX_ENABLE_GPU ${SHERPA_ONNX_ENABLE_GPU}") message(STATUS "SHERPA_ONNX_ENABLE_WASM ${SHERPA_ONNX_ENABLE_WASM}") message(STATUS "SHERPA_ONNX_ENABLE_WASM_TTS ${SHERPA_ONNX_ENABLE_WASM_TTS}") message(STATUS "SHERPA_ONNX_ENABLE_WASM_ASR ${SHERPA_ONNX_ENABLE_WASM_ASR}") +message(STATUS "SHERPA_ONNX_ENABLE_WASM_NODEJS ${SHERPA_ONNX_ENABLE_WASM_NODEJS}") if(SHERPA_ONNX_ENABLE_WASM_TTS) if(NOT SHERPA_ONNX_ENABLE_WASM) @@ -121,6 +123,12 @@ if(SHERPA_ONNX_ENABLE_WASM_ASR) endif() endif() +if(SHERPA_ONNX_ENABLE_WASM_NODEJS) + if(NOT SHERPA_ONNX_ENABLE_WASM) + message(FATAL_ERROR "Please set SHERPA_ONNX_ENABLE_WASM to ON if you enable WASM for NodeJS") + endif() +endif() + if(SHERPA_ONNX_ENABLE_WASM) add_definitions(-DSHERPA_ONNX_ENABLE_WASM=1) endif() diff --git a/build-wasm-simd-nodejs.sh b/build-wasm-simd-nodejs.sh new file mode 100755 index 000000000..21a3b25da --- /dev/null +++ b/build-wasm-simd-nodejs.sh @@ -0,0 +1,63 @@ +#!/usr/bin/env bash +# Copyright (c) 2024 Xiaomi Corporation +# +# This script is to build sherpa-onnx for WebAssembly (NodeJS) +# +# Please use NodeJS >= 18 + +set -ex + +if [ x"$EMSCRIPTEN" == x"" ]; then + if ! command -v emcc &> /dev/null; then + echo "Please install emscripten first" + echo "" + echo "You can use the following commands to install it:" + echo "" + echo "git clone https://github.com/emscripten-core/emsdk.git" + echo "cd emsdk" + echo "git pull" + echo "./emsdk install latest" + echo "./emsdk activate latest" + echo "source ./emsdk_env.sh" + exit 1 + else + EMSCRIPTEN=$(dirname $(realpath $(which emcc))) + fi +fi + +export EMSCRIPTEN=$EMSCRIPTEN +echo "EMSCRIPTEN: $EMSCRIPTEN" +if [ ! -f $EMSCRIPTEN/cmake/Modules/Platform/Emscripten.cmake ]; then + echo "Cannot find $EMSCRIPTEN/cmake/Modules/Platform/Emscripten.cmake" + echo "Please make sure you have installed emsdk correctly" + exit 1 +fi + +mkdir -p build-wasm-simd-nodejs +pushd build-wasm-simd-nodejs + +export SHERPA_ONNX_IS_USING_BUILD_WASM_SH=ON + +cmake \ + -DCMAKE_INSTALL_PREFIX=./install \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_TOOLCHAIN_FILE=$EMSCRIPTEN/cmake/Modules/Platform/Emscripten.cmake \ + \ + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ + -DSHERPA_ONNX_ENABLE_TESTS=OFF \ + -DSHERPA_ONNX_ENABLE_CHECK=OFF \ + -DBUILD_SHARED_LIBS=OFF \ + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ + -DSHERPA_ONNX_ENABLE_JNI=OFF \ + -DSHERPA_ONNX_ENABLE_C_API=ON \ + -DSHERPA_ONNX_ENABLE_WEBSOCKET=OFF \ + -DSHERPA_ONNX_ENABLE_GPU=OFF \ + -DSHERPA_ONNX_ENABLE_WASM=ON \ + -DSHERPA_ONNX_ENABLE_WASM_NODEJS=ON \ + -DSHERPA_ONNX_ENABLE_BINARY=OFF \ + -DSHERPA_ONNX_LINK_LIBSTDCPP_STATICALLY=OFF \ + .. +make -j10 +make install + +ls -lh install/bin/wasm/nodejs diff --git a/wasm/CMakeLists.txt b/wasm/CMakeLists.txt index dc077a23d..c5d283f19 100644 --- a/wasm/CMakeLists.txt +++ b/wasm/CMakeLists.txt @@ -5,3 +5,7 @@ endif() if(SHERPA_ONNX_ENABLE_WASM_ASR) add_subdirectory(asr) endif() + +if(SHERPA_ONNX_ENABLE_WASM_NODEJS) + add_subdirectory(nodejs) +endif() diff --git a/wasm/asr/sherpa-onnx-wasm-main-asr.cc b/wasm/asr/sherpa-onnx-wasm-main-asr.cc index 236766312..951391e14 100644 --- a/wasm/asr/sherpa-onnx-wasm-main-asr.cc +++ b/wasm/asr/sherpa-onnx-wasm-main-asr.cc @@ -1,4 +1,4 @@ -// wasm/sherpa-onnx-wasm-asr-main.cc +// wasm/sherpa-onnx-wasm-main-asr.cc // // Copyright (c) 2024 Xiaomi Corporation #include diff --git a/wasm/nodejs/CMakeLists.txt b/wasm/nodejs/CMakeLists.txt new file mode 100644 index 000000000..a9b78ff09 --- /dev/null +++ b/wasm/nodejs/CMakeLists.txt @@ -0,0 +1,66 @@ +if(NOT $ENV{SHERPA_ONNX_IS_USING_BUILD_WASM_SH}) + message(FATAL_ERROR "Please use ./build-wasm-simd-nodejs.sh to build for wasm NodeJS") +endif() + +set(exported_functions + MyPrintOfflineTtsConfig + MyPrint + #tts + SherpaOnnxCreateOfflineTts + SherpaOnnxDestroyOfflineTts + SherpaOnnxDestroyOfflineTtsGeneratedAudio + SherpaOnnxOfflineTtsGenerate + SherpaOnnxOfflineTtsGenerateWithCallback + SherpaOnnxOfflineTtsNumSpeakers + SherpaOnnxOfflineTtsSampleRate + SherpaOnnxWriteWave + # streaming asr + AcceptWaveform + CreateOnlineRecognizer + CreateOnlineStream + DecodeOnlineStream + DestroyOnlineRecognizer + DestroyOnlineRecognizerResult + DestroyOnlineStream + GetOnlineStreamResult + InputFinished + IsEndpoint + IsOnlineStreamReady + Reset + # non-streaming ASR +) + +set(mangled_exported_functions) +foreach(x IN LISTS exported_functions) + list(APPEND mangled_exported_functions "_${x}") +endforeach() +list(JOIN mangled_exported_functions "," all_exported_functions) + +include_directories(${CMAKE_SOURCE_DIR}) +set(MY_FLAGS " -s FORCE_FILESYSTEM=1 -s INITIAL_MEMORY=512MB -s ALLOW_MEMORY_GROWTH=1") +string(APPEND MY_FLAGS " -sSTACK_SIZE=10485760 ") # 10MB +string(APPEND MY_FLAGS " -sEXPORTED_FUNCTIONS=[_CopyHeap,_malloc,_free,${all_exported_functions}] ") +string(APPEND MY_FLAGS " -sEXPORTED_RUNTIME_METHODS=['ccall','stringToUTF8','setValue','getValue'] ") +string(APPEND MY_FLAGS " -sNODERAWFS=1 ") +string(APPEND MY_FLAGS " -sEXPORTED_RUNTIME_METHODS=['ccall','stringToUTF8','setValue','getValue','lengthBytesUTF8','UTF8ToString'] ") +string(APPEND MY_FLAGS " -sMODULARIZE=1 -sWASM_ASYNC_COMPILATION=0 ") + +message(STATUS "MY_FLAGS: ${MY_FLAGS}") + +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${MY_FLAGS}") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${MY_FLAGS}") +set(CMAKE_EXECUTBLE_LINKER_FLAGS "${CMAKE_EXECUTBLE_LINKER_FLAGS} ${MY_FLAGS}") + +add_executable(sherpa-onnx-wasm-nodejs sherpa-onnx-wasm-nodejs.cc) +target_link_libraries(sherpa-onnx-wasm-nodejs sherpa-onnx-core sherpa-onnx-c-api) +install(TARGETS sherpa-onnx-wasm-nodejs DESTINATION bin/wasm/nodejs) + +install( + FILES + ${CMAKE_SOURCE_DIR}/wasm/asr/sherpa-onnx-asr.js + ${CMAKE_SOURCE_DIR}/wasm/tts/sherpa-onnx-tts.js + "$/sherpa-onnx-wasm-nodejs.js" + "$/sherpa-onnx-wasm-nodejs.wasm" + DESTINATION + bin/wasm/nodejs +) diff --git a/wasm/nodejs/sherpa-onnx-wasm-nodejs.cc b/wasm/nodejs/sherpa-onnx-wasm-nodejs.cc new file mode 100644 index 000000000..fc628cdc2 --- /dev/null +++ b/wasm/nodejs/sherpa-onnx-wasm-nodejs.cc @@ -0,0 +1,40 @@ +// wasm/sherpa-onnx-wasm-main-nodejs.cc +// +// Copyright (c) 2024 Xiaomi Corporation +#include + +#include +#include + +#include "sherpa-onnx/c-api/c-api.h" + +extern "C" { + +void MyPrintOfflineTtsConfig(SherpaOnnxOfflineTtsConfig *tts_config) { + auto tts_model_config = &tts_config->model; + auto vits_model_config = &tts_model_config->vits; + fprintf(stdout, "----------vits model config----------\n"); + fprintf(stdout, "model: %s\n", vits_model_config->model); + fprintf(stdout, "lexicon: %s\n", vits_model_config->lexicon); + fprintf(stdout, "tokens: %s\n", vits_model_config->tokens); + fprintf(stdout, "data_dir: %s\n", vits_model_config->data_dir); + fprintf(stdout, "noise scale: %.3f\n", vits_model_config->noise_scale); + fprintf(stdout, "noise scale w: %.3f\n", vits_model_config->noise_scale_w); + fprintf(stdout, "length scale: %.3f\n", vits_model_config->length_scale); + + fprintf(stdout, "----------tts model config----------\n"); + fprintf(stdout, "num threads: %d\n", tts_model_config->num_threads); + fprintf(stdout, "debug: %d\n", tts_model_config->debug); + fprintf(stdout, "provider: %s\n", tts_model_config->provider); + + fprintf(stdout, "----------tts config----------\n"); + fprintf(stdout, "rule_fsts: %s\n", tts_config->rule_fsts); + fprintf(stdout, "max num sentences: %d\n", tts_config->max_num_sentences); +} + +void MyPrint(SherpaOnnxOnlineRecognizerConfig *config) {} + +void CopyHeap(const char *src, int32_t num_bytes, char *dst) { + std::copy(src, src + num_bytes, dst); +} +} diff --git a/wasm/tts/app-tts.js b/wasm/tts/app-tts.js index 8359080c2..1da37a9ef 100644 --- a/wasm/tts/app-tts.js +++ b/wasm/tts/app-tts.js @@ -22,7 +22,7 @@ Module.onRuntimeInitialized = function() { console.log('Model files downloaded!'); console.log('Initializing tts ......'); - tts = initSherpaOnnxOfflineTts() + tts = initSherpaOnnxOfflineTts(Module) if (tts.numSpeakers > 1) { speakerIdLabel.innerHTML = `Speaker ID (0 - ${tts.numSpeakers - 1}):`; } diff --git a/wasm/tts/sherpa-onnx-tts.js b/wasm/tts/sherpa-onnx-tts.js index f32b09a12..0c6cde533 100644 --- a/wasm/tts/sherpa-onnx-tts.js +++ b/wasm/tts/sherpa-onnx-tts.js @@ -1,109 +1,109 @@ -function freeConfig(config) { +function freeConfig(config, Module) { if ('buffer' in config) { - _free(config.buffer); + Module._free(config.buffer); } if ('config' in config) { - freeConfig(config.config) + freeConfig(config.config, Module) } - _free(config.ptr); + Module._free(config.ptr); } // The user should free the returned pointers -function initSherpaOnnxOfflineTtsVitsModelConfig(config) { - let modelLen = lengthBytesUTF8(config.model) + 1; - let lexiconLen = lengthBytesUTF8(config.lexicon) + 1; - let tokensLen = lengthBytesUTF8(config.tokens) + 1; - let dataDirLen = lengthBytesUTF8(config.dataDir) + 1; +function initSherpaOnnxOfflineTtsVitsModelConfig(config, Module) { + let modelLen = Module.lengthBytesUTF8(config.model) + 1; + let lexiconLen = Module.lengthBytesUTF8(config.lexicon) + 1; + let tokensLen = Module.lengthBytesUTF8(config.tokens) + 1; + let dataDirLen = Module.lengthBytesUTF8(config.dataDir) + 1; let n = modelLen + lexiconLen + tokensLen + dataDirLen; - let buffer = _malloc(n); + let buffer = Module._malloc(n); let len = 7 * 4; - let ptr = _malloc(len); + let ptr = Module._malloc(len); let offset = 0; - stringToUTF8(config.model, buffer + offset, modelLen); + Module.stringToUTF8(config.model, buffer + offset, modelLen); offset += modelLen; - stringToUTF8(config.lexicon, buffer + offset, lexiconLen); + Module.stringToUTF8(config.lexicon, buffer + offset, lexiconLen); offset += lexiconLen; - stringToUTF8(config.tokens, buffer + offset, tokensLen); + Module.stringToUTF8(config.tokens, buffer + offset, tokensLen); offset += tokensLen; - stringToUTF8(config.dataDir, buffer + offset, dataDirLen); + Module.stringToUTF8(config.dataDir, buffer + offset, dataDirLen); offset += dataDirLen; offset = 0; - setValue(ptr, buffer + offset, 'i8*'); + Module.setValue(ptr, buffer + offset, 'i8*'); offset += modelLen; - setValue(ptr + 4, buffer + offset, 'i8*'); + Module.setValue(ptr + 4, buffer + offset, 'i8*'); offset += lexiconLen; - setValue(ptr + 8, buffer + offset, 'i8*'); + Module.setValue(ptr + 8, buffer + offset, 'i8*'); offset += tokensLen; - setValue(ptr + 12, buffer + offset, 'i8*'); + Module.setValue(ptr + 12, buffer + offset, 'i8*'); offset += dataDirLen; - setValue(ptr + 16, config.noiseScale, 'float'); - setValue(ptr + 20, config.noiseScaleW, 'float'); - setValue(ptr + 24, config.lengthScale, 'float'); + Module.setValue(ptr + 16, config.noiseScale, 'float'); + Module.setValue(ptr + 20, config.noiseScaleW, 'float'); + Module.setValue(ptr + 24, config.lengthScale, 'float'); return { buffer: buffer, ptr: ptr, len: len, } } -function initSherpaOnnxOfflineTtsModelConfig(config) { - let vitsModelConfig = - initSherpaOnnxOfflineTtsVitsModelConfig(config.offlineTtsVitsModelConfig); +function initSherpaOnnxOfflineTtsModelConfig(config, Module) { + let vitsModelConfig = initSherpaOnnxOfflineTtsVitsModelConfig( + config.offlineTtsVitsModelConfig, Module); let len = vitsModelConfig.len + 3 * 4; - let ptr = _malloc(len); + let ptr = Module._malloc(len); let offset = 0; - _CopyHeap(vitsModelConfig.ptr, vitsModelConfig.len, ptr + offset); + Module._CopyHeap(vitsModelConfig.ptr, vitsModelConfig.len, ptr + offset); offset += vitsModelConfig.len; - setValue(ptr + offset, config.numThreads, 'i32'); + Module.setValue(ptr + offset, config.numThreads, 'i32'); offset += 4; - setValue(ptr + offset, config.debug, 'i32'); + Module.setValue(ptr + offset, config.debug, 'i32'); offset += 4; - let providerLen = lengthBytesUTF8(config.provider) + 1; - let buffer = _malloc(providerLen); - stringToUTF8(config.provider, buffer, providerLen); - setValue(ptr + offset, buffer, 'i8*'); + let providerLen = Module.lengthBytesUTF8(config.provider) + 1; + let buffer = Module._malloc(providerLen); + Module.stringToUTF8(config.provider, buffer, providerLen); + Module.setValue(ptr + offset, buffer, 'i8*'); return { buffer: buffer, ptr: ptr, len: len, config: vitsModelConfig, } } -function initSherpaOnnxOfflineTtsConfig(config) { +function initSherpaOnnxOfflineTtsConfig(config, Module) { let modelConfig = - initSherpaOnnxOfflineTtsModelConfig(config.offlineTtsModelConfig); + initSherpaOnnxOfflineTtsModelConfig(config.offlineTtsModelConfig, Module); let len = modelConfig.len + 2 * 4; - let ptr = _malloc(len); + let ptr = Module._malloc(len); let offset = 0; - _CopyHeap(modelConfig.ptr, modelConfig.len, ptr + offset); + Module._CopyHeap(modelConfig.ptr, modelConfig.len, ptr + offset); offset += modelConfig.len; - let ruleFstsLen = lengthBytesUTF8(config.ruleFsts) + 1; - let buffer = _malloc(ruleFstsLen); - stringToUTF8(config.ruleFsts, buffer, ruleFstsLen); - setValue(ptr + offset, buffer, 'i8*'); + let ruleFstsLen = Module.lengthBytesUTF8(config.ruleFsts) + 1; + let buffer = Module._malloc(ruleFstsLen); + Module.stringToUTF8(config.ruleFsts, buffer, ruleFstsLen); + Module.setValue(ptr + offset, buffer, 'i8*'); offset += 4; - setValue(ptr + offset, config.maxNumSentences, 'i32'); + Module.setValue(ptr + offset, config.maxNumSentences, 'i32'); return { buffer: buffer, ptr: ptr, len: len, config: modelConfig, @@ -111,19 +111,21 @@ function initSherpaOnnxOfflineTtsConfig(config) { } class OfflineTts { - constructor(configObj) { - let config = initSherpaOnnxOfflineTtsConfig(configObj) - let handle = _SherpaOnnxCreateOfflineTts(config.ptr); + constructor(configObj, Module) { + console.log(configObj) + let config = initSherpaOnnxOfflineTtsConfig(configObj, Module) + let handle = Module._SherpaOnnxCreateOfflineTts(config.ptr); - freeConfig(config); + freeConfig(config, Module); this.handle = handle; - this.sampleRate = _SherpaOnnxOfflineTtsSampleRate(this.handle); - this.numSpeakers = _SherpaOnnxOfflineTtsNumSpeakers(this.handle); + this.sampleRate = Module._SherpaOnnxOfflineTtsSampleRate(this.handle); + this.numSpeakers = Module._SherpaOnnxOfflineTtsNumSpeakers(this.handle); + this.Module = Module } free() { - _SherpaOnnxDestroyOfflineTts(this.handle); + this.Module._SherpaOnnxDestroyOfflineTts(this.handle); this.handle = 0 } @@ -133,28 +135,43 @@ class OfflineTts { // speed: 1.0 // } generate(config) { - let textLen = lengthBytesUTF8(config.text) + 1; - let textPtr = _malloc(textLen); - stringToUTF8(config.text, textPtr, textLen); + let textLen = this.Module.lengthBytesUTF8(config.text) + 1; + let textPtr = this.Module._malloc(textLen); + this.Module.stringToUTF8(config.text, textPtr, textLen); - let h = _SherpaOnnxOfflineTtsGenerate( + let h = this.Module._SherpaOnnxOfflineTtsGenerate( this.handle, textPtr, config.sid, config.speed); - let numSamples = HEAP32[h / 4 + 1]; - let sampleRate = HEAP32[h / 4 + 2]; + let numSamples = this.Module.HEAP32[h / 4 + 1]; + let sampleRate = this.Module.HEAP32[h / 4 + 2]; - let samplesPtr = HEAP32[h / 4] / 4; + let samplesPtr = this.Module.HEAP32[h / 4] / 4; let samples = new Float32Array(numSamples); for (let i = 0; i < numSamples; i++) { - samples[i] = HEAPF32[samplesPtr + i]; + samples[i] = this.Module.HEAPF32[samplesPtr + i]; } - _SherpaOnnxDestroyOfflineTtsGeneratedAudio(h); + this.Module._SherpaOnnxDestroyOfflineTtsGeneratedAudio(h); return {samples: samples, sampleRate: sampleRate}; } + save(filename, audio) { + let samples = audio.samples; + let sampleRate = audio.sampleRate; + let ptr = this.Module._malloc(samples.length * 4); + for (let i = 0; i < samples.length; i++) { + this.Module.HEAPF32[ptr / 4 + i] = samples[i]; + } + + let filenameLen = this.Module.lengthBytesUTF8(filename) + 1; + let buffer = this.Module._malloc(filenameLen); + this.Module.stringToUTF8(filename, buffer, filenameLen); + this.Module._SherpaOnnxWriteWave(ptr, samples.length, sampleRate, buffer); + this.Module._free(buffer); + this.Module._free(ptr); + } } -function initSherpaOnnxOfflineTts() { +function initSherpaOnnxOfflineTts(Module, myConfig) { let offlineTtsVitsModelConfig = { model: './model.onnx', lexicon: '', @@ -176,5 +193,16 @@ function initSherpaOnnxOfflineTts() { maxNumSentences: 1, } - return new OfflineTts(offlineTtsConfig); + if (myConfig) { + offlineTtsConfig = myConfig; + } + + return new OfflineTts(offlineTtsConfig, Module); +} + +if (typeof process == 'object' && typeof process.versions == 'object' && + typeof process.versions.node == 'string') { + module.exports = { + initSherpaOnnxOfflineTts, + }; } diff --git a/wasm/tts/sherpa-onnx-wasm-main-tts.cc b/wasm/tts/sherpa-onnx-wasm-main-tts.cc index 2441ae98f..71701419c 100644 --- a/wasm/tts/sherpa-onnx-wasm-main-tts.cc +++ b/wasm/tts/sherpa-onnx-wasm-main-tts.cc @@ -1,4 +1,4 @@ -// wasm/sherpa-onnx-wasm-main.cc +// wasm/sherpa-onnx-wasm-main-tts.cc // // Copyright (c) 2024 Xiaomi Corporation #include