Skip to content

Commit

Permalink
Support adding punctuations to the speech recogntion result (#761)
Browse files Browse the repository at this point in the history
  • Loading branch information
csukuangfj authored Apr 13, 2024
1 parent 0f4705f commit 329fe1a
Show file tree
Hide file tree
Showing 27 changed files with 866 additions and 16 deletions.
41 changes: 41 additions & 0 deletions .github/scripts/test-offline-punctuation.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#!/usr/bin/env bash

set -ex

log() {
# This function is from espnet
local fname=${BASH_SOURCE[1]##*/}
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}

echo "EXE is $EXE"
echo "PATH: $PATH"

which $EXE

log "------------------------------------------------------------"
log "Download model "
log "------------------------------------------------------------"

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
tar xvf sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
rm sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
repo=sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12
ls -lh $repo

$EXE \
--debug=1 \
--ct-transformer=$repo/model.onnx \
"这是一个测试你好吗How are you我很好thank you are you ok谢谢你"

$EXE \
--debug=1 \
--ct-transformer=$repo/model.onnx \
"我们都是木头人不会说话不会动"

$EXE \
--debug=1 \
--ct-transformer=$repo/model.onnx \
"The African blogosphere is rapidly expanding bringing more voices online in the form of commentaries opinions analyses rants and poetry"

rm -rf $repo
10 changes: 10 additions & 0 deletions .github/workflows/linux.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ on:
- '.github/scripts/test-online-ctc.sh'
- '.github/scripts/test-offline-tts.sh'
- '.github/scripts/test-audio-tagging.sh'
- '.github/scripts/test-offline-punctuation.sh'
- 'CMakeLists.txt'
- 'cmake/**'
- 'sherpa-onnx/csrc/*'
Expand All @@ -34,6 +35,7 @@ on:
- '.github/scripts/test-online-ctc.sh'
- '.github/scripts/test-offline-tts.sh'
- '.github/scripts/test-audio-tagging.sh'
- '.github/scripts/test-offline-punctuation.sh'
- 'CMakeLists.txt'
- 'cmake/**'
- 'sherpa-onnx/csrc/*'
Expand Down Expand Up @@ -126,6 +128,14 @@ jobs:
name: release-${{ matrix.build_type }}-with-shared-lib-${{ matrix.shared_lib }}-with-tts-${{ matrix.with_tts }}
path: build/bin/*

- name: Test offline punctuation
shell: bash
run: |
export PATH=$PWD/build/bin:$PATH
export EXE=sherpa-onnx-offline-punctuation
.github/scripts/test-offline-punctuation.sh
- name: Test C API
shell: bash
run: |
Expand Down
10 changes: 10 additions & 0 deletions .github/workflows/macos.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ on:
- '.github/scripts/test-offline-tts.sh'
- '.github/scripts/test-online-ctc.sh'
- '.github/scripts/test-audio-tagging.sh'
- '.github/scripts/test-offline-punctuation.sh'
- 'CMakeLists.txt'
- 'cmake/**'
- 'sherpa-onnx/csrc/*'
Expand All @@ -33,6 +34,7 @@ on:
- '.github/scripts/test-offline-tts.sh'
- '.github/scripts/test-online-ctc.sh'
- '.github/scripts/test-audio-tagging.sh'
- '.github/scripts/test-offline-punctuation.sh'
- 'CMakeLists.txt'
- 'cmake/**'
- 'sherpa-onnx/csrc/*'
Expand Down Expand Up @@ -105,6 +107,14 @@ jobs:
otool -L build/bin/sherpa-onnx
otool -l build/bin/sherpa-onnx
- name: Test offline punctuation
shell: bash
run: |
export PATH=$PWD/build/bin:$PATH
export EXE=sherpa-onnx-offline-punctuation
.github/scripts/test-offline-punctuation.sh
- name: Test C API
shell: bash
run: |
Expand Down
11 changes: 10 additions & 1 deletion .github/workflows/windows-x64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ on:
- '.github/scripts/test-online-ctc.sh'
- '.github/scripts/test-offline-tts.sh'
- '.github/scripts/test-audio-tagging.sh'
- '.github/scripts/test-offline-punctuation.sh'
- 'CMakeLists.txt'
- 'cmake/**'
- 'sherpa-onnx/csrc/*'
Expand All @@ -30,6 +31,7 @@ on:
- '.github/scripts/test-online-ctc.sh'
- '.github/scripts/test-offline-tts.sh'
- '.github/scripts/test-audio-tagging.sh'
- '.github/scripts/test-offline-punctuation.sh'
- 'CMakeLists.txt'
- 'cmake/**'
- 'sherpa-onnx/csrc/*'
Expand Down Expand Up @@ -72,6 +74,14 @@ jobs:
ls -lh ./bin/Release/sherpa-onnx.exe
- name: Test offline punctuation
shell: bash
run: |
export PATH=$PWD/build/bin/Release:$PATH
export EXE=sherpa-onnx-offline-punctuation.exe
.github/scripts/test-offline-punctuation.sh
- name: Test C API
shell: bash
run: |
Expand All @@ -82,7 +92,6 @@ jobs:
.github/scripts/test-c-api.sh
- name: Test Audio tagging
shell: bash
run: |
Expand Down
10 changes: 10 additions & 0 deletions .github/workflows/windows-x86.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ on:
- '.github/scripts/test-offline-tts.sh'
- '.github/scripts/test-online-ctc.sh'
- '.github/scripts/test-audio-tagging.sh'
- '.github/scripts/test-offline-punctuation.sh'
- 'CMakeLists.txt'
- 'cmake/**'
- 'sherpa-onnx/csrc/*'
Expand All @@ -30,6 +31,7 @@ on:
- '.github/scripts/test-offline-tts.sh'
- '.github/scripts/test-online-ctc.sh'
- '.github/scripts/test-audio-tagging.sh'
- '.github/scripts/test-offline-punctuation.sh'
- 'CMakeLists.txt'
- 'cmake/**'
- 'sherpa-onnx/csrc/*'
Expand Down Expand Up @@ -72,6 +74,14 @@ jobs:
ls -lh ./bin/Release/sherpa-onnx.exe
- name: Test offline punctuation
shell: bash
run: |
export PATH=$PWD/build/bin/Release:$PATH
export EXE=sherpa-onnx-offline-punctuation.exe
.github/scripts/test-offline-punctuation.sh
- name: Test spoken language identification (C API)
shell: bash
run: |
Expand Down
3 changes: 2 additions & 1 deletion cmake/cmake_extension.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,14 +46,15 @@ def enable_alsa():
def get_binaries():
binaries = [
"sherpa-onnx",
"sherpa-onnx-offline-audio-tagging",
"sherpa-onnx-keyword-spotter",
"sherpa-onnx-microphone",
"sherpa-onnx-microphone-offline",
"sherpa-onnx-microphone-offline-audio-tagging",
"sherpa-onnx-microphone-offline-speaker-identification",
"sherpa-onnx-offline",
"sherpa-onnx-offline-audio-tagging",
"sherpa-onnx-offline-language-identification",
"sherpa-onnx-offline-punctuation",
"sherpa-onnx-offline-tts",
"sherpa-onnx-offline-tts-play",
"sherpa-onnx-offline-websocket-server",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -408,8 +408,11 @@ def main():
vad_config.silero_vad.min_silence_duration = 0.25
vad_config.silero_vad.min_speech_duration = 0.25
vad_config.sample_rate = g_sample_rate
if not vad_config.validate():
raise ValueError("Errors in vad config")

window_size = vad_config.silero_vad.window_size

vad = sherpa_onnx.VoiceActivityDetector(vad_config, buffer_size_in_seconds=100)

samples_per_read = int(0.1 * g_sample_rate) # 0.1 second = 100 ms
Expand Down
22 changes: 16 additions & 6 deletions sherpa-onnx/csrc/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,14 @@ list(APPEND sources
offline-zipformer-audio-tagging-model.cc
)

# punctuation
list(APPEND sources
offline-ct-transformer-model.cc
offline-punctuation-impl.cc
offline-punctuation-model-config.cc
offline-punctuation.cc
)

if(SHERPA_ONNX_ENABLE_TTS)
list(APPEND sources
lexicon.cc
Expand Down Expand Up @@ -201,9 +209,10 @@ if(SHERPA_ONNX_ENABLE_BINARY)
add_executable(sherpa-onnx sherpa-onnx.cc)
add_executable(sherpa-onnx-keyword-spotter sherpa-onnx-keyword-spotter.cc)
add_executable(sherpa-onnx-offline sherpa-onnx-offline.cc)
add_executable(sherpa-onnx-offline-parallel sherpa-onnx-offline-parallel.cc)
add_executable(sherpa-onnx-offline-language-identification sherpa-onnx-offline-language-identification.cc)
add_executable(sherpa-onnx-offline-audio-tagging sherpa-onnx-offline-audio-tagging.cc)
add_executable(sherpa-onnx-offline-language-identification sherpa-onnx-offline-language-identification.cc)
add_executable(sherpa-onnx-offline-parallel sherpa-onnx-offline-parallel.cc)
add_executable(sherpa-onnx-offline-punctuation sherpa-onnx-offline-punctuation.cc)

if(SHERPA_ONNX_ENABLE_TTS)
add_executable(sherpa-onnx-offline-tts sherpa-onnx-offline-tts.cc)
Expand All @@ -213,9 +222,10 @@ if(SHERPA_ONNX_ENABLE_BINARY)
sherpa-onnx
sherpa-onnx-keyword-spotter
sherpa-onnx-offline
sherpa-onnx-offline-parallel
sherpa-onnx-offline-language-identification
sherpa-onnx-offline-audio-tagging
sherpa-onnx-offline-language-identification
sherpa-onnx-offline-parallel
sherpa-onnx-offline-punctuation
)
if(SHERPA_ONNX_ENABLE_TTS)
list(APPEND main_exes
Expand Down Expand Up @@ -260,11 +270,11 @@ endif()

if(SHERPA_ONNX_HAS_ALSA AND SHERPA_ONNX_ENABLE_BINARY)
add_executable(sherpa-onnx-alsa sherpa-onnx-alsa.cc alsa.cc)
add_executable(sherpa-onnx-keyword-spotter-alsa sherpa-onnx-keyword-spotter-alsa.cc alsa.cc)
add_executable(sherpa-onnx-alsa-offline sherpa-onnx-alsa-offline.cc alsa.cc)
add_executable(sherpa-onnx-alsa-offline-audio-tagging sherpa-onnx-alsa-offline-audio-tagging.cc alsa.cc)
add_executable(sherpa-onnx-alsa-offline-speaker-identification sherpa-onnx-alsa-offline-speaker-identification.cc alsa.cc)
add_executable(sherpa-onnx-keyword-spotter-alsa sherpa-onnx-keyword-spotter-alsa.cc alsa.cc)
add_executable(sherpa-onnx-vad-alsa sherpa-onnx-vad-alsa.cc alsa.cc)
add_executable(sherpa-onnx-alsa-offline-audio-tagging sherpa-onnx-alsa-offline-audio-tagging.cc alsa.cc)


if(SHERPA_ONNX_ENABLE_TTS)
Expand Down
5 changes: 0 additions & 5 deletions sherpa-onnx/csrc/lexicon.cc
Original file line number Diff line number Diff line change
Expand Up @@ -74,11 +74,6 @@ static std::vector<std::string> ProcessHeteronyms(
return ans;
}

static void ToLowerCase(std::string *in_out) {
std::transform(in_out->begin(), in_out->end(), in_out->begin(),
[](unsigned char c) { return std::tolower(c); });
}

// Note: We don't use SymbolTable here since tokens may contain a blank
// in the first column
static std::unordered_map<std::string, int32_t> ReadTokens(std::istream &is) {
Expand Down
18 changes: 18 additions & 0 deletions sherpa-onnx/csrc/macros.h
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,24 @@
} \
} while (0)

// read a vector of strings separated by sep
#define SHERPA_ONNX_READ_META_DATA_VEC_STRING_SEP(dst, src_key, sep) \
do { \
auto value = \
meta_data.LookupCustomMetadataMapAllocated(src_key, allocator); \
if (!value) { \
SHERPA_ONNX_LOGE("%s does not exist in the metadata", src_key); \
exit(-1); \
} \
SplitStringToVector(value.get(), sep, false, &dst); \
\
if (dst.empty()) { \
SHERPA_ONNX_LOGE("Invalid value %s for %s. Empty vector!", value.get(), \
src_key); \
exit(-1); \
} \
} while (0)

// Read a string
#define SHERPA_ONNX_READ_META_DATA_STR(dst, src_key) \
do { \
Expand Down
29 changes: 29 additions & 0 deletions sherpa-onnx/csrc/offline-ct-transformer-model-meta-data.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
// sherpa-onnx/csrc/offline-ct-transformer-model-meta_data.h
//
// Copyright (c) 2024 Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_OFFLINE_CT_TRANSFORMER_MODEL_META_DATA_H_
#define SHERPA_ONNX_CSRC_OFFLINE_CT_TRANSFORMER_MODEL_META_DATA_H_

#include <string>
#include <unordered_map>
#include <vector>

namespace sherpa_onnx {

struct OfflineCtTransformerModelMetaData {
std::unordered_map<std::string, int32_t> token2id;
std::unordered_map<std::string, int32_t> punct2id;
std::vector<std::string> id2punct;

int32_t unk_id;
int32_t dot_id;
int32_t comma_id;
int32_t quest_id;
int32_t pause_id;
int32_t underline_id;
int32_t num_punctuations;
};

} // namespace sherpa_onnx

#endif // SHERPA_ONNX_CSRC_OFFLINE_CT_TRANSFORMER_MODEL_META_DATA_H_
Loading

0 comments on commit 329fe1a

Please sign in to comment.