Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add C API for spoken language identification. #695

Merged
merged 5 commits into from
Mar 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions .github/scripts/test-c-api.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#!/usr/bin/env bash

set -e

log() {
# This function is from espnet
local fname=${BASH_SOURCE[1]##*/}
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}

echo "SLID_EXE is $SLID_EXE"
echo "PATH: $PATH"


log "------------------------------------------------------------"
log "Download whisper tiny for spoken language identification "
log "------------------------------------------------------------"

rm -rf sherpa-onnx-whisper-tiny*
curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.tar.bz2
tar xvf sherpa-onnx-whisper-tiny.tar.bz2
rm sherpa-onnx-whisper-tiny.tar.bz2

$SLID_EXE

rm -rf sherpa-onnx-whisper-tiny*
52 changes: 26 additions & 26 deletions .github/scripts/test-spoken-language-identification.sh
Original file line number Diff line number Diff line change
Expand Up @@ -28,32 +28,32 @@ ar-arabic.wav
bg-bulgarian.wav
cs-czech.wav
da-danish.wav
de-german.wav
el-greek.wav
en-english.wav
es-spanish.wav
fa-persian.wav
fi-finnish.wav
fr-french.wav
hi-hindi.wav
hr-croatian.wav
id-indonesian.wav
it-italian.wav
ja-japanese.wav
ko-korean.wav
nl-dutch.wav
no-norwegian.wav
po-polish.wav
pt-portuguese.wav
ro-romanian.wav
ru-russian.wav
sk-slovak.wav
sv-swedish.wav
ta-tamil.wav
tl-tagalog.wav
tr-turkish.wav
uk-ukrainian.wav
zh-chinese.wav
# de-german.wav
# el-greek.wav
# en-english.wav
# es-spanish.wav
# fa-persian.wav
# fi-finnish.wav
# fr-french.wav
# hi-hindi.wav
# hr-croatian.wav
# id-indonesian.wav
# it-italian.wav
# ja-japanese.wav
# ko-korean.wav
# nl-dutch.wav
# no-norwegian.wav
# po-polish.wav
# pt-portuguese.wav
# ro-romanian.wav
# ru-russian.wav
# sk-slovak.wav
# sv-swedish.wav
# ta-tamil.wav
# tl-tagalog.wav
# tr-turkish.wav
# uk-ukrainian.wav
# zh-chinese.wav
)

for wav in ${waves[@]}; do
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/android.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ jobs:
git config --global user.email "[email protected]"
git config --global user.name "Fangjun Kuang"

rm -rf huggingface
GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-libs huggingface

cd huggingface
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/build-xcframework.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ jobs:
git config --global user.email "[email protected]"
git config --global user.name "Fangjun Kuang"

rm -rf huggingface
GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-libs huggingface

cd huggingface
Expand Down
12 changes: 10 additions & 2 deletions .github/workflows/linux.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -123,8 +123,15 @@ jobs:
name: release-${{ matrix.build_type }}-${{ matrix.shared_lib }}
path: build/bin/*

- name: Test spoken language identification
if: matrix.build_type != 'Debug'
- name: Test spoken language identification (C API)
shell: bash
run: |
export PATH=$PWD/build/bin:$PATH
export SLID_EXE=spoken-language-identification-c-api

.github/scripts/test-c-api.sh

- name: Test spoken language identification (C++ API)
shell: bash
run: |
export PATH=$PWD/build/bin:$PATH
Expand Down Expand Up @@ -243,6 +250,7 @@ jobs:
git config --global user.email "[email protected]"
git config --global user.name "Fangjun Kuang"

rm -rf huggingface
GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-libs huggingface

cd huggingface
Expand Down
11 changes: 9 additions & 2 deletions .github/workflows/macos.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -102,8 +102,15 @@ jobs:
otool -L build/bin/sherpa-onnx
otool -l build/bin/sherpa-onnx

- name: Test spoken language identification
if: matrix.build_type != 'Debug'
- name: Test spoken language identification (C API)
shell: bash
run: |
export PATH=$PWD/build/bin:$PATH
export SLID_EXE=spoken-language-identification-c-api

.github/scripts/test-c-api.sh

- name: Test spoken language identification (C++ API)
shell: bash
run: |
export PATH=$PWD/build/bin:$PATH
Expand Down
10 changes: 9 additions & 1 deletion .github/workflows/windows-x64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,15 @@ jobs:

ls -lh ./bin/Release/sherpa-onnx.exe

- name: Test spoken language identification
- name: Test spoken language identification (C API)
shell: bash
run: |
export PATH=$PWD/build/bin/Release:$PATH
export SLID_EXE=spoken-language-identification-c-api.exe

.github/scripts/test-c-api.sh

- name: Test spoken language identification (C++ API)
shell: bash
run: |
export PATH=$PWD/build/bin/Release:$PATH
Expand Down
8 changes: 8 additions & 0 deletions .github/workflows/windows-x86.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,14 @@ jobs:

ls -lh ./bin/Release/sherpa-onnx.exe

- name: Test spoken language identification (C API)
shell: bash
run: |
export PATH=$PWD/build/bin/Release:$PATH
export SLID_EXE=spoken-language-identification-c-api.exe

.github/scripts/test-c-api.sh

# - name: Test spoken language identification
# shell: bash
# run: |
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -85,3 +85,4 @@ log
vits-piper-*
vits-coqui-*
vits-mms-*
*.tar.bz2
5 changes: 4 additions & 1 deletion c-api-examples/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,11 @@ target_link_libraries(decode-file-c-api sherpa-onnx-c-api cargs)
add_executable(offline-tts-c-api offline-tts-c-api.c)
target_link_libraries(offline-tts-c-api sherpa-onnx-c-api cargs)

add_executable(spoken-language-identification-c-api spoken-language-identification-c-api.c)
target_link_libraries(spoken-language-identification-c-api sherpa-onnx-c-api)

if(SHERPA_ONNX_HAS_ALSA)
add_subdirectory(./asr-microphone-example)
else()
elseif((UNIX AND NOT APPLE) OR LINUX)
message(WARNING "Not include ./asr-microphone-example since alsa is not available")
endif()
2 changes: 1 addition & 1 deletion c-api-examples/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ CUR_DIR :=$(shell pwd)
CFLAGS := -I ../ -I ../build/_deps/cargs-src/include/
LDFLAGS := -L ../build/lib
LDFLAGS += -L ../build/_deps/onnxruntime-src/lib
LDFLAGS += -lsherpa-onnx-c-api -lsherpa-onnx-core -lonnxruntime -lkaldi-native-fbank-core -lkaldi-decoder-core -lsherpa-onnx-kaldifst-core -lsherpa-onnx-fst -lcargs
LDFLAGS += -lsherpa-onnx-c-api -lsherpa-onnx-core -lkaldi-decoder-core -lsherpa-onnx-kaldifst-core -lsherpa-onnx-fst -lkaldi-native-fbank-core -lpiper_phonemize -lespeak-ng -lucd -lcargs -lonnxruntime
LDFLAGS += -framework Foundation
LDFLAGS += -lc++
LDFLAGS += -Wl,-rpath,${CUR_DIR}/../build/lib
Expand Down
63 changes: 32 additions & 31 deletions c-api-examples/decode-file-c-api.c
Original file line number Diff line number Diff line change
Expand Up @@ -169,55 +169,56 @@ int32_t main(int32_t argc, char *argv[]) {
int32_t segment_id = 0;

const char *wav_filename = argv[context.index];
FILE *fp = fopen(wav_filename, "rb");
if (!fp) {
fprintf(stderr, "Failed to open %s\n", wav_filename);
const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename);
if (wave == NULL) {
fprintf(stderr, "Failed to read %s\n", wav_filename);
return -1;
}

// Assume the wave header occupies 44 bytes.
fseek(fp, 44, SEEK_SET);

// simulate streaming

#define N 3200 // 0.2 s. Sample rate is fixed to 16 kHz

int16_t buffer[N];
float samples[N];
fprintf(stderr, "sample rate: %d, num samples: %d, duration: %.2f s\n",
wave->sample_rate, wave->num_samples,
(float)wave->num_samples / wave->sample_rate);

int32_t k = 0;
while (k < wave->num_samples) {
int32_t start = k;
int32_t end =
(start + N > wave->num_samples) ? wave->num_samples : (start + N);
k += N;

AcceptWaveform(stream, wave->sample_rate, wave->samples + start,
end - start);
while (IsOnlineStreamReady(recognizer, stream)) {
DecodeOnlineStream(recognizer, stream);
}

while (!feof(fp)) {
size_t n = fread((void *)buffer, sizeof(int16_t), N, fp);
if (n > 0) {
for (size_t i = 0; i != n; ++i) {
samples[i] = buffer[i] / 32768.;
}
AcceptWaveform(stream, 16000, samples, n);
while (IsOnlineStreamReady(recognizer, stream)) {
DecodeOnlineStream(recognizer, stream);
}
const SherpaOnnxOnlineRecognizerResult *r =
GetOnlineStreamResult(recognizer, stream);

const SherpaOnnxOnlineRecognizerResult *r =
GetOnlineStreamResult(recognizer, stream);
if (strlen(r->text)) {
SherpaOnnxPrint(display, segment_id, r->text);
}

if (IsEndpoint(recognizer, stream)) {
if (strlen(r->text)) {
SherpaOnnxPrint(display, segment_id, r->text);
++segment_id;
}

if (IsEndpoint(recognizer, stream)) {
if (strlen(r->text)) {
++segment_id;
}
Reset(recognizer, stream);
}

DestroyOnlineRecognizerResult(r);
Reset(recognizer, stream);
}

DestroyOnlineRecognizerResult(r);
}
fclose(fp);

// add some tail padding
float tail_paddings[4800] = {0}; // 0.3 seconds at 16 kHz sample rate
AcceptWaveform(stream, 16000, tail_paddings, 4800);
AcceptWaveform(stream, wave->sample_rate, tail_paddings, 4800);

SherpaOnnxFreeWave(wave);

InputFinished(stream);
while (IsOnlineStreamReady(recognizer, stream)) {
Expand Down
65 changes: 65 additions & 0 deletions c-api-examples/spoken-language-identification-c-api.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@

// We assume you have pre-downloaded the whisper multi-lingual models
// from https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
// An example command to download the "tiny" whisper model is given below:
//
// clang-format off
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.tar.bz2
// tar xvf sherpa-onnx-whisper-tiny.tar.bz2
// rm sherpa-onnx-whisper-tiny.tar.bz2
//
// clang-format on

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "sherpa-onnx/c-api/c-api.h"

int32_t main() {
SherpaOnnxSpokenLanguageIdentificationConfig config;

memset(&config, 0, sizeof(config));

config.whisper.encoder = "./sherpa-onnx-whisper-tiny/tiny-encoder.int8.onnx";
config.whisper.decoder = "./sherpa-onnx-whisper-tiny/tiny-decoder.int8.onnx";
config.num_threads = 1;
config.debug = 1;
config.provider = "cpu";

const SherpaOnnxSpokenLanguageIdentification *slid =
SherpaOnnxCreateSpokenLanguageIdentification(&config);
if (!slid) {
fprintf(stderr, "Failed to create spoken language identifier");
return -1;
}

// You can find more test waves from
// https://hf-mirror.com/spaces/k2-fsa/spoken-language-identification/tree/main/test_wavs
const char *wav_filename = "./sherpa-onnx-whisper-tiny/test_wavs/0.wav";
const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename);
if (wave == NULL) {
fprintf(stderr, "Failed to read %s\n", wav_filename);
return -1;
}

SherpaOnnxOfflineStream *stream =
SherpaOnnxSpokenLanguageIdentificationCreateOfflineStream(slid);

AcceptWaveformOffline(stream, wave->sample_rate, wave->samples,
wave->num_samples);

const SherpaOnnxSpokenLanguageIdentificationResult *result =
SherpaOnnxSpokenLanguageIdentificationCompute(slid, stream);

fprintf(stderr, "wav_filename: %s\n", wav_filename);
fprintf(stderr, "Detected language: %s\n", result->lang);

SherpaOnnxDestroySpokenLanguageIdentificationResult(result);
DestroyOfflineStream(stream);
SherpaOnnxFreeWave(wave);
SherpaOnnxDestroySpokenLanguageIdentification(slid);

return 0;
}
2 changes: 1 addition & 1 deletion dotnet-examples/offline-decode-files/run-hotwords.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
set -ex

if [ ! -d ./sherpa-onnx-zipformer-en-2023-04-01 ]; then
wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-en-2023-04-01.tar.bz2
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-en-2023-04-01.tar.bz2
tar xvf sherpa-onnx-zipformer-en-2023-04-01.tar.bz2
rm sherpa-onnx-zipformer-en-2023-04-01.tar.bz2
fi
Expand Down
2 changes: 1 addition & 1 deletion dotnet-examples/offline-decode-files/run-zipformer.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
set -ex

if [ ! -d ./sherpa-onnx-zipformer-en-2023-04-01 ]; then
wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-en-2023-04-01.tar.bz2
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-en-2023-04-01.tar.bz2
tar xvf sherpa-onnx-zipformer-en-2023-04-01.tar.bz2
rm sherpa-onnx-zipformer-en-2023-04-01.tar.bz2
fi
Expand Down
2 changes: 1 addition & 1 deletion dotnet-examples/online-decode-files/run-transducer.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

set -ex
if [ ! -d ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20 ]; then
wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
fi

Expand Down
Loading
Loading