Skip to content

Commit

Permalink
Support heteronyms in Chinese TTS (#738)
Browse files Browse the repository at this point in the history
  • Loading branch information
csukuangfj authored Apr 8, 2024
1 parent c1c0f5b commit a5f8fbc
Show file tree
Hide file tree
Showing 49 changed files with 308 additions and 143 deletions.
8 changes: 4 additions & 4 deletions .github/scripts/test-nodejs-npm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -70,9 +70,9 @@ rm -rf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18
curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2
tar xf vits-piper-en_US-amy-low.tar.bz2
node ./test-offline-tts-en.js
rm vits-piper-en_US-amy-low.tar.bz2
rm vits-piper-en_US-amy-low*

curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-zh-aishell3.tar.bz2
tar xvf vits-zh-aishell3.tar.bz2
curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2
tar xvf vits-icefall-zh-aishell3.tar.bz2
node ./test-offline-tts-zh.js
rm vits-zh-aishell3.tar.bz2
rm vits-icefall-zh-aishell3*
1 change: 1 addition & 0 deletions .github/workflows/arm-linux-gnueabihf.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,7 @@ jobs:
rm -v $dst/lib/libasound.so
rm -v $dst/lib/libonnxruntime.so
rm -v $dst/lib/libsherpa-onnx-fst.so
rm -v $dst/lib/libsherpa-onnx-fstfar.so
fi
tree $dst
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/riscv64-linux.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,7 @@ jobs:
rm -fv $dst/lib/libasound.so
rm -fv $dst/lib/libonnxruntime.so
rm -fv $dst/lib/libsherpa-onnx-fst.so
rm -fv $dst/lib/libsherpa-onnx-fstfar.so
fi
tree $dst
Expand Down
6 changes: 4 additions & 2 deletions .github/workflows/test-go.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -111,9 +111,11 @@ jobs:
rm -rf vits-vctk
echo "Test vits-zh-aishell3"
git clone https://huggingface.co/csukuangfj/vits-zh-aishell3
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2
tar xvf vits-icefall-zh-aishell3.tar.bz2
rm vits-icefall-zh-aishell3.tar.bz2
./run-vits-zh-aishell3.sh
rm -rf vits-zh-aishell3
rm -rf vits-icefall-zh-aishell3
echo "Test vits-piper-en_US-lessac-medium"
git clone https://huggingface.co/csukuangfj/vits-piper-en_US-lessac-medium
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -90,3 +90,4 @@ sherpa-onnx-paraformer-trilingual-zh-cantonese-en
sr-data
*xcworkspace/xcuserdata/*

vits-icefall-*
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
cmake_minimum_required(VERSION 3.13 FATAL_ERROR)
project(sherpa-onnx)

set(SHERPA_ONNX_VERSION "1.9.16")
set(SHERPA_ONNX_VERSION "1.9.17")

# Disable warning about
#
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,7 @@ class MainActivity : AppCompatActivity() {
var modelDir: String?
var modelName: String?
var ruleFsts: String?
var ruleFars: String?
var lexicon: String?
var dataDir: String?
var assets: AssetManager? = application.assets
Expand All @@ -165,6 +166,7 @@ class MainActivity : AppCompatActivity() {
modelDir = null
modelName = null
ruleFsts = null
ruleFars = null
lexicon = null
dataDir = null

Expand All @@ -181,9 +183,11 @@ class MainActivity : AppCompatActivity() {
// dataDir = "vits-piper-en_US-amy-low/espeak-ng-data"

// Example 3:
// modelDir = "vits-zh-aishell3"
// modelName = "vits-aishell3.onnx"
// ruleFsts = "vits-zh-aishell3/rule.fst"
// https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2
// modelDir = "vits-icefall-zh-aishell3"
// modelName = "model.onnx"
// ruleFsts = "vits-icefall-zh-aishell3/phone.fst,vits-icefall-zh-aishell3/date.fst,vits-icefall-zh-aishell3/number.fst,"
// ruleFars = "vits-icefall-zh-aishell3/rule.far"
// lexicon = "lexicon.txt"

// Example 4:
Expand All @@ -202,7 +206,8 @@ class MainActivity : AppCompatActivity() {
val config = getOfflineTtsConfig(
modelDir = modelDir!!, modelName = modelName!!, lexicon = lexicon ?: "",
dataDir = dataDir ?: "",
ruleFsts = ruleFsts ?: ""
ruleFsts = ruleFsts ?: "",
ruleFars = ruleFars ?: "",
)!!

tts = OfflineTts(assetManager = assets, config = config)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ data class OfflineTtsModelConfig(
data class OfflineTtsConfig(
var model: OfflineTtsModelConfig,
var ruleFsts: String = "",
var ruleFars: String = "",
var maxNumSentences: Int = 1,
)

Expand Down Expand Up @@ -151,7 +152,8 @@ fun getOfflineTtsConfig(
modelName: String,
lexicon: String,
dataDir: String,
ruleFsts: String
ruleFsts: String,
ruleFars: String
): OfflineTtsConfig? {
return OfflineTtsConfig(
model = OfflineTtsModelConfig(
Expand All @@ -166,5 +168,6 @@ fun getOfflineTtsConfig(
provider = "cpu",
),
ruleFsts = ruleFsts,
ruleFars = ruleFars,
)
}
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ object TtsEngine {
private var modelDir: String? = null
private var modelName: String? = null
private var ruleFsts: String? = null
private var ruleFars: String? = null
private var lexicon: String? = null
private var dataDir: String? = null
private var assets: AssetManager? = null
Expand All @@ -50,6 +51,7 @@ object TtsEngine {
modelDir = null
modelName = null
ruleFsts = null
ruleFars = null
lexicon = null
dataDir = null
lang = null
Expand All @@ -73,9 +75,10 @@ object TtsEngine {

// Example 3:
// https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2
// modelDir = "vits-zh-aishell3"
// modelName = "vits-aishell3.onnx"
// ruleFsts = "vits-zh-aishell3/rule.fst"
// modelDir = "vits-icefall-zh-aishell3"
// modelName = "model.onnx"
// ruleFsts = "vits-icefall-zh-aishell3/phone.fst,vits-icefall-zh-aishell3/date.fst,vits-icefall-zh-aishell3/number.fst,vits-icefall-zh-aishell3/new_heteronym.fst"
// ruleFars = "vits-icefall-zh-aishell3/rule.far"
// lexicon = "lexicon.txt"
// lang = "zho"

Expand Down Expand Up @@ -108,7 +111,8 @@ object TtsEngine {
val config = getOfflineTtsConfig(
modelDir = modelDir!!, modelName = modelName!!, lexicon = lexicon ?: "",
dataDir = dataDir ?: "",
ruleFsts = ruleFsts ?: ""
ruleFsts = ruleFsts ?: "",
ruleFars = ruleFars ?: ""
)!!

tts = OfflineTts(assetManager = assets, config = config)
Expand Down
3 changes: 3 additions & 0 deletions build-ios.sh
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@ echo "Generate xcframework"

mkdir -p "build/simulator/lib"
for f in libkaldi-native-fbank-core.a libsherpa-onnx-c-api.a libsherpa-onnx-core.a \
libsherpa-onnx-fstfar.a \
libsherpa-onnx-fst.a libsherpa-onnx-kaldifst-core.a libkaldi-decoder-core.a \
libucd.a libpiper_phonemize.a libespeak-ng.a; do
lipo -create build/simulator_arm64/lib/${f} \
Expand All @@ -137,6 +138,7 @@ libtool -static -o build/simulator/sherpa-onnx.a \
build/simulator/lib/libkaldi-native-fbank-core.a \
build/simulator/lib/libsherpa-onnx-c-api.a \
build/simulator/lib/libsherpa-onnx-core.a \
build/simulator/lib/libsherpa-onnx-fstfar.a \
build/simulator/lib/libsherpa-onnx-fst.a \
build/simulator/lib/libsherpa-onnx-kaldifst-core.a \
build/simulator/lib/libkaldi-decoder-core.a \
Expand All @@ -148,6 +150,7 @@ libtool -static -o build/os64/sherpa-onnx.a \
build/os64/lib/libkaldi-native-fbank-core.a \
build/os64/lib/libsherpa-onnx-c-api.a \
build/os64/lib/libsherpa-onnx-core.a \
build/os64/lib/libsherpa-onnx-fstfar.a \
build/os64/lib/libsherpa-onnx-fst.a \
build/os64/lib/libsherpa-onnx-kaldifst-core.a \
build/os64/lib/libkaldi-decoder-core.a \
Expand Down
1 change: 1 addition & 0 deletions build-swift-macos.sh
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ libtool -static -o ./install/lib/libsherpa-onnx.a \
./install/lib/libsherpa-onnx-c-api.a \
./install/lib/libsherpa-onnx-core.a \
./install/lib/libkaldi-native-fbank-core.a \
./install/lib/libsherpa-onnx-fstfar.a \
./install/lib/libsherpa-onnx-fst.a \
./install/lib/libsherpa-onnx-kaldifst-core.a \
./install/lib/libkaldi-decoder-core.a \
Expand Down
2 changes: 1 addition & 1 deletion c-api-examples/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ CUR_DIR :=$(shell pwd)
CFLAGS := -I ../ -I ../build/_deps/cargs-src/include/
LDFLAGS := -L ../build/lib
LDFLAGS += -L ../build/_deps/onnxruntime-src/lib
LDFLAGS += -lsherpa-onnx-c-api -lsherpa-onnx-core -lkaldi-decoder-core -lsherpa-onnx-kaldifst-core -lsherpa-onnx-fst -lkaldi-native-fbank-core -lpiper_phonemize -lespeak-ng -lucd -lcargs -lonnxruntime
LDFLAGS += -lsherpa-onnx-c-api -lsherpa-onnx-core -lkaldi-decoder-core -lsherpa-onnx-kaldifst-core -lsherpa-onnx-fstfar -lsherpa-onnx-fst -lkaldi-native-fbank-core -lpiper_phonemize -lespeak-ng -lucd -lcargs -lonnxruntime
LDFLAGS += -framework Foundation
LDFLAGS += -lc++
LDFLAGS += -Wl,-rpath,${CUR_DIR}/../build/lib
Expand Down
1 change: 1 addition & 0 deletions cmake/cmake_extension.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ def get_binaries():
"piper_phonemize.dll",
"sherpa-onnx-c-api.dll",
"sherpa-onnx-core.dll",
"sherpa-onnx-fstfar.lib",
"sherpa-onnx-fst.lib",
"sherpa-onnx-kaldifst-core.lib",
"sherpa-onnx-portaudio.dll",
Expand Down
15 changes: 15 additions & 0 deletions cmake/kaldi-decoder.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -64,12 +64,22 @@ function(download_kaldi_decoder)
kaldifst_core
fst
DESTINATION ..)
if(SHERPA_ONNX_ENABLE_TTS)
install(TARGETS
fstfar
DESTINATION ..)
endif()
else()
install(TARGETS
kaldi-decoder-core
kaldifst_core
fst
DESTINATION lib)
if(SHERPA_ONNX_ENABLE_TTS)
install(TARGETS
fstfar
DESTINATION lib)
endif()
endif()

if(WIN32 AND BUILD_SHARED_LIBS)
Expand All @@ -78,6 +88,11 @@ function(download_kaldi_decoder)
kaldifst_core
fst
DESTINATION bin)
if(SHERPA_ONNX_ENABLE_TTS)
install(TARGETS
fstfar
DESTINATION bin)
endif()
endif()
endfunction()

Expand Down
6 changes: 0 additions & 6 deletions cmake/kaldifst.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -50,13 +50,7 @@ function(download_kaldifst)
${kaldifst_SOURCE_DIR}/
)

target_include_directories(fst
PUBLIC
${openfst_SOURCE_DIR}/src/include
)

set_target_properties(kaldifst_core PROPERTIES OUTPUT_NAME "sherpa-onnx-kaldifst-core")
set_target_properties(fst PROPERTIES OUTPUT_NAME "sherpa-onnx-fst")
endfunction()

download_kaldifst()
27 changes: 14 additions & 13 deletions cmake/openfst.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ function(download_openfst)
include(FetchContent)

set(openfst_URL "https://github.com/kkm000/openfst/archive/refs/tags/win/1.6.5.1.tar.gz")
set(openfst_URL2 "https://huggingface.co/csukuangfj/kaldi-hmm-gmm-cmake-deps/resolve/main/openfst-win-1.6.5.1.tar.gz")
set(openfst_URL2 "https://hub.nuaa.cf/kkm000/openfst/archive/refs/tags/win/1.6.5.1.tar.gz")
set(openfst_HASH "SHA256=02c49b559c3976a536876063369efc0e41ab374be1035918036474343877046e")

# If you don't have access to the Internet,
Expand All @@ -31,7 +31,7 @@ function(download_openfst)
set(HAVE_COMPACT OFF CACHE BOOL "" FORCE)
set(HAVE_COMPRESS OFF CACHE BOOL "" FORCE)
set(HAVE_CONST OFF CACHE BOOL "" FORCE)
set(HAVE_FAR OFF CACHE BOOL "" FORCE)
set(HAVE_FAR ON CACHE BOOL "" FORCE)
set(HAVE_GRM OFF CACHE BOOL "" FORCE)
set(HAVE_PDT OFF CACHE BOOL "" FORCE)
set(HAVE_MPDT OFF CACHE BOOL "" FORCE)
Expand Down Expand Up @@ -70,20 +70,21 @@ function(download_openfst)
add_subdirectory(${openfst_SOURCE_DIR} ${openfst_BINARY_DIR} EXCLUDE_FROM_ALL)
set(openfst_SOURCE_DIR ${openfst_SOURCE_DIR} PARENT_SCOPE)

# Rename libfst.so.6 to libkaldifst_fst.so.6 to avoid potential conflicts
# when kaldifst is installed.
set_target_properties(fst PROPERTIES OUTPUT_NAME "kaldifst_fst")
# Rename libfst.so.6 to libsherpa-onnx-fst.so.6 to avoid potential conflicts
# when sherpa-onnx is installed.
set_target_properties(fst PROPERTIES OUTPUT_NAME "sherpa-onnx-fst")
set_target_properties(fstfar PROPERTIES OUTPUT_NAME "sherpa-onnx-fstfar")

install(TARGETS fst
DESTINATION lib
target_include_directories(fst
PUBLIC
${openfst_SOURCE_DIR}/src/include
)

if(KALDIFST_BUILD_PYTHON)
set_target_properties(fstscript PROPERTIES OUTPUT_NAME "kaldifst_fstscript")
install(TARGETS fstscript
DESTINATION lib
)
endif()
target_include_directories(fstfar
PUBLIC
${openfst_SOURCE_DIR}/src/include
)
# installed in ./kaldi-decoder.cmake
endfunction()

download_openfst()
2 changes: 1 addition & 1 deletion cmake/sherpa-onnx.pc.in
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,4 @@ Cflags: -I"${includedir}"
# Note: -lcargs is required only for the following file
# https://github.com/k2-fsa/sherpa-onnx/blob/master/c-api-examples/decode-file-c-api.c
# We add it here so that users don't need to specify -lcargs when compiling decode-file-c-api.c
Libs: -L"${libdir}" -lsherpa-onnx-c-api -lsherpa-onnx-core -lkaldi-decoder-core -lsherpa-onnx-kaldifst-core -lsherpa-onnx-fst -lkaldi-native-fbank-core -lpiper_phonemize -lespeak-ng -lucd -lcargs -lonnxruntime -Wl,-rpath,${libdir} @SHERPA_ONNX_PKG_CONFIG_EXTRA_LIBS@
Libs: -L"${libdir}" -lsherpa-onnx-c-api -lsherpa-onnx-core -lkaldi-decoder-core -lsherpa-onnx-kaldifst-core -lsherpa-onnx-fstfar -lsherpa-onnx-fst -lkaldi-native-fbank-core -lpiper_phonemize -lespeak-ng -lucd -lcargs -lonnxruntime -Wl,-rpath,${libdir} @SHERPA_ONNX_PKG_CONFIG_EXTRA_LIBS@
17 changes: 11 additions & 6 deletions dotnet-examples/offline-tts/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ class Options
[Option("tts-rule-fsts", Required = false, Default = "", HelpText = "path to rule.fst")]
public string RuleFsts { get; set; }

[Option("tts-rule-fars", Required = false, Default = "", HelpText = "path to rule.far")]
public string RuleFars { get; set; }

[Option("vits-data-dir", Required = false, Default = "", HelpText = "Path to the directory containing dict for espeak-ng.")]
public string DataDir { get; set; }

Expand Down Expand Up @@ -72,14 +75,15 @@ private static void DisplayHelp<T>(ParserResult<T> result, IEnumerable<Error> er
string usage = @"
# vits-aishell3
wget -qq https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-zh-aishell3.tar.bz2
tar xf vits-zh-aishell3.tar.bz2
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2
tar xvf vits-icefall-zh-aishell3.tar.bz2
dotnet run \
--vits-model=./vits-zh-aishell3/vits-aishell3.onnx \
--vits-tokens=./vits-zh-aishell3/tokens.txt \
--vits-lexicon=./vits-zh-aishell3/lexicon.txt \
--tts-rule-fsts=./vits-zh-aishell3/rule.fst \
--vits-model=./vits-icefall-zh-aishell3/model.onnx \
--vits-tokens=./vits-icefall-zh-aishell3/tokens.txt \
--vits-lexicon=./vits-icefall-zh-aishell3/lexicon.txt \
--tts-rule-fsts=./vits-icefall-zh-aishell3/phone.fst,./vits-icefall-zh-aishell3/date.fst,./vits-icefall-zh-aishell3/number.fst \
--tts-rule-fars=./vits-icefall-zh-aishell3/rule.far \
--sid=66 \
--debug=1 \
--output-filename=./aishell3-66.wav \
Expand Down Expand Up @@ -127,6 +131,7 @@ private static void Run(Options options)
config.Model.Debug = options.Debug;
config.Model.Provider = "cpu";
config.RuleFsts = options.RuleFsts;
config.RuleFars = options.RuleFars;
config.MaxNumSentences = options.MaxNumSentences;

OfflineTts tts = new OfflineTts(config);
Expand Down
18 changes: 9 additions & 9 deletions dotnet-examples/offline-tts/run-aishell3.sh
Original file line number Diff line number Diff line change
@@ -1,18 +1,18 @@
#!/usr/bin/env bash
set -ex
if [ ! -f ./vits-zh-aishell3/vits-aishell3.onnx ]; then
# wget -qq https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-zh-aishell3.tar.bz2
curl -OL https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-zh-aishell3.tar.bz2
tar xf vits-zh-aishell3.tar.bz2
rm vits-zh-aishell3.tar.bz2
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2
tar xvf vits-icefall-zh-aishell3.tar.bz2
rm vits-icefall-zh-aishell3.tar.bz2
fi

dotnet run \
--vits-model=./vits-zh-aishell3/vits-aishell3.onnx \
--vits-tokens=./vits-zh-aishell3/tokens.txt \
--vits-lexicon=./vits-zh-aishell3/lexicon.txt \
--tts-rule-fsts=./vits-zh-aishell3/rule.fst \
--vits-model=./vits-icefall-zh-aishell3/model.onnx \
--vits-tokens=./vits-icefall-zh-aishell3/tokens.txt \
--vits-lexicon=./vits-icefall-zh-aishell3/lexicon.txt \
--tts-rule-fsts=./vits-icefall-zh-aishell3/phone.fst,./vits-icefall-zh-aishell3/date.fst,./vits-icefall-zh-aishell3/number.fst \
--tts-rule-fars=./vits-icefall-zh-aishell3/rule.far \
--sid=66 \
--debug=1 \
--output-filename=./aishell3-66.wav \
--text="这是一个语音合成测试, 写于公元 2024 年 1 月 28 号, 23点27分,星期天。"
--text="这是一个语音合成测试, 写于公元 2024 年 1 月 28 号, 23点27分,星期天。长沙长大,去过长白山和长安街。行行出状元。行行,银行行长,行业。"
1 change: 1 addition & 0 deletions go-api-examples/non-streaming-tts/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ func main() {
flag.IntVar(&config.Model.Debug, "debug", 0, "Whether to show debug message")
flag.StringVar(&config.Model.Provider, "provider", "cpu", "Provider to use")
flag.StringVar(&config.RuleFsts, "tts-rule-fsts", "", "Path to rule.fst")
flag.StringVar(&config.RuleFars, "tts-rule-fars", "", "Path to rule.far")
flag.IntVar(&config.MaxNumSentences, "tts-max-num-sentences", 1, "Batch size")

flag.IntVar(&sid, "sid", 0, "Speaker ID. Used only for multi-speaker models")
Expand Down
Loading

0 comments on commit a5f8fbc

Please sign in to comment.