Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add C and C++ API for Moonshine models #1476

Merged
merged 4 commits into from
Oct 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 74 additions & 0 deletions .github/workflows/c-api.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,80 @@ jobs:
otool -L ./install/lib/libsherpa-onnx-c-api.dylib
fi

- name: Test vad + Whisper tiny.en
shell: bash
run: |
gcc -o vad-whisper-c-api ./c-api-examples/vad-whisper-c-api.c \
-I ./build/install/include \
-L ./build/install/lib/ \
-l sherpa-onnx-c-api \
-l onnxruntime

# Now download models
#
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/Obama.wav

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2
tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2
rm sherpa-onnx-whisper-tiny.en.tar.bz2

export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH

./vad-whisper-c-api

rm -rf sherpa-onnx-*
rm -rf *.onnx
rm *.wav

- name: Test vad + Moonshine
shell: bash
run: |
gcc -o vad-moonshine-c-api ./c-api-examples/vad-moonshine-c-api.c \
-I ./build/install/include \
-L ./build/install/lib/ \
-l sherpa-onnx-c-api \
-l onnxruntime

# Now download models
#
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/Obama.wav

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2

export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH

./vad-moonshine-c-api

rm -rf sherpa-onnx-*
rm -rf *.onnx
rm *.wav

- name: Test Moonshine
shell: bash
run: |
gcc -o moonshine-c-api ./c-api-examples/moonshine-c-api.c \
-I ./build/install/include \
-L ./build/install/lib/ \
-l sherpa-onnx-c-api \
-l onnxruntime

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2

export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH

./moonshine-c-api

rm -rf sherpa-onnx-*

- name: Test ffmpeg
if: matrix.os == 'macos-latest'
shell: bash
Expand Down
22 changes: 22 additions & 0 deletions .github/workflows/cxx-api.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,28 @@ jobs:
otool -L ./install/lib/libsherpa-onnx-cxx-api.dylib
fi

- name: Test Moonshine tiny
shell: bash
run: |
g++ -std=c++17 -o moonshine-cxx-api ./cxx-api-examples/moonshine-cxx-api.cc \
-I ./build/install/include \
-L ./build/install/lib/ \
-l sherpa-onnx-cxx-api \
-l sherpa-onnx-c-api \
-l onnxruntime

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2

export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH

./moonshine-cxx-api

rm -rf sherpa-onnx-*
rm ./moonshine-cxx-api

- name: Test whisper
shell: bash
run: |
Expand Down
9 changes: 9 additions & 0 deletions c-api-examples/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@ target_link_libraries(whisper-c-api sherpa-onnx-c-api)
add_executable(sense-voice-c-api sense-voice-c-api.c)
target_link_libraries(sense-voice-c-api sherpa-onnx-c-api)

add_executable(moonshine-c-api moonshine-c-api.c)
target_link_libraries(moonshine-c-api sherpa-onnx-c-api)

add_executable(zipformer-c-api zipformer-c-api.c)
target_link_libraries(zipformer-c-api sherpa-onnx-c-api)

Expand All @@ -53,6 +56,12 @@ target_link_libraries(telespeech-c-api sherpa-onnx-c-api)
add_executable(vad-sense-voice-c-api vad-sense-voice-c-api.c)
target_link_libraries(vad-sense-voice-c-api sherpa-onnx-c-api)

add_executable(vad-whisper-c-api vad-whisper-c-api.c)
target_link_libraries(vad-whisper-c-api sherpa-onnx-c-api)

add_executable(vad-moonshine-c-api vad-moonshine-c-api.c)
target_link_libraries(vad-moonshine-c-api sherpa-onnx-c-api)

add_executable(streaming-zipformer-buffered-tokens-hotwords-c-api
streaming-zipformer-buffered-tokens-hotwords-c-api.c)
target_link_libraries(streaming-zipformer-buffered-tokens-hotwords-c-api sherpa-onnx-c-api)
Expand Down
83 changes: 83 additions & 0 deletions c-api-examples/moonshine-c-api.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
// c-api-examples/moonshine-c-api.c
//
// Copyright (c) 2024 Xiaomi Corporation

//
// This file demonstrates how to use Moonshine tiny with sherpa-onnx's C API.
// clang-format off
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
// tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
// rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
//
// clang-format on

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "sherpa-onnx/c-api/c-api.h"

int32_t main() {
const char *wav_filename =
"./sherpa-onnx-moonshine-tiny-en-int8/test_wavs/0.wav";
const char *preprocessor =
"./sherpa-onnx-moonshine-tiny-en-int8/preprocess.onnx";
const char *encoder = "./sherpa-onnx-moonshine-tiny-en-int8/encode.int8.onnx";
const char *uncached_decoder =
"./sherpa-onnx-moonshine-tiny-en-int8/uncached_decode.int8.onnx";
const char *cached_decoder =
"./sherpa-onnx-moonshine-tiny-en-int8/cached_decode.int8.onnx";
const char *tokens = "./sherpa-onnx-moonshine-tiny-en-int8/tokens.txt";

const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename);
if (wave == NULL) {
fprintf(stderr, "Failed to read %s\n", wav_filename);
return -1;
}

// Offline model config
SherpaOnnxOfflineModelConfig offline_model_config;
memset(&offline_model_config, 0, sizeof(offline_model_config));
offline_model_config.debug = 1;
offline_model_config.num_threads = 1;
offline_model_config.provider = "cpu";
offline_model_config.tokens = tokens;
offline_model_config.moonshine.preprocessor = preprocessor;
offline_model_config.moonshine.encoder = encoder;
offline_model_config.moonshine.uncached_decoder = uncached_decoder;
offline_model_config.moonshine.cached_decoder = cached_decoder;

// Recognizer config
SherpaOnnxOfflineRecognizerConfig recognizer_config;
memset(&recognizer_config, 0, sizeof(recognizer_config));
recognizer_config.decoding_method = "greedy_search";
recognizer_config.model_config = offline_model_config;

const SherpaOnnxOfflineRecognizer *recognizer =
SherpaOnnxCreateOfflineRecognizer(&recognizer_config);

if (recognizer == NULL) {
fprintf(stderr, "Please check your config!\n");
SherpaOnnxFreeWave(wave);
return -1;
}

const SherpaOnnxOfflineStream *stream =
SherpaOnnxCreateOfflineStream(recognizer);

SherpaOnnxAcceptWaveformOffline(stream, wave->sample_rate, wave->samples,
wave->num_samples);
SherpaOnnxDecodeOfflineStream(recognizer, stream);
const SherpaOnnxOfflineRecognizerResult *result =
SherpaOnnxGetOfflineStreamResult(stream);

fprintf(stderr, "Decoded text: %s\n", result->text);

SherpaOnnxDestroyOfflineRecognizerResult(result);
SherpaOnnxDestroyOfflineStream(stream);
SherpaOnnxDestroyOfflineRecognizer(recognizer);
SherpaOnnxFreeWave(wave);

return 0;
}
171 changes: 171 additions & 0 deletions c-api-examples/vad-moonshine-c-api.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
// c-api-examples/vad-moonshine-c-api.c
//
// Copyright (c) 2024 Xiaomi Corporation

//
// This file demonstrates how to use VAD + Moonshine with sherpa-onnx's C API.
// clang-format off
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/Obama.wav
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
// tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
// rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
//
// clang-format on

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "sherpa-onnx/c-api/c-api.h"

int32_t main() {
const char *wav_filename = "./Obama.wav";
const char *vad_filename = "./silero_vad.onnx";

const char *preprocessor =
"./sherpa-onnx-moonshine-tiny-en-int8/preprocess.onnx";
const char *encoder = "./sherpa-onnx-moonshine-tiny-en-int8/encode.int8.onnx";
const char *uncached_decoder =
"./sherpa-onnx-moonshine-tiny-en-int8/uncached_decode.int8.onnx";
const char *cached_decoder =
"./sherpa-onnx-moonshine-tiny-en-int8/cached_decode.int8.onnx";
const char *tokens = "./sherpa-onnx-moonshine-tiny-en-int8/tokens.txt";

const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename);
if (wave == NULL) {
fprintf(stderr, "Failed to read %s\n", wav_filename);
return -1;
}

if (wave->sample_rate != 16000) {
fprintf(stderr, "Expect the sample rate to be 16000. Given: %d\n",
wave->sample_rate);
SherpaOnnxFreeWave(wave);
return -1;
}

// Offline model config
SherpaOnnxOfflineModelConfig offline_model_config;
memset(&offline_model_config, 0, sizeof(offline_model_config));
offline_model_config.debug = 0;
offline_model_config.num_threads = 1;
offline_model_config.provider = "cpu";
offline_model_config.tokens = tokens;
offline_model_config.moonshine.preprocessor = preprocessor;
offline_model_config.moonshine.encoder = encoder;
offline_model_config.moonshine.uncached_decoder = uncached_decoder;
offline_model_config.moonshine.cached_decoder = cached_decoder;

// Recognizer config
SherpaOnnxOfflineRecognizerConfig recognizer_config;
memset(&recognizer_config, 0, sizeof(recognizer_config));
recognizer_config.decoding_method = "greedy_search";
recognizer_config.model_config = offline_model_config;

const SherpaOnnxOfflineRecognizer *recognizer =
SherpaOnnxCreateOfflineRecognizer(&recognizer_config);

if (recognizer == NULL) {
fprintf(stderr, "Please check your recognizer config!\n");
SherpaOnnxFreeWave(wave);
return -1;
}

SherpaOnnxVadModelConfig vadConfig;
memset(&vadConfig, 0, sizeof(vadConfig));
vadConfig.silero_vad.model = vad_filename;
vadConfig.silero_vad.threshold = 0.5;
vadConfig.silero_vad.min_silence_duration = 0.5;
vadConfig.silero_vad.min_speech_duration = 0.5;
vadConfig.silero_vad.max_speech_duration = 10;
vadConfig.silero_vad.window_size = 512;
vadConfig.sample_rate = 16000;
vadConfig.num_threads = 1;
vadConfig.debug = 1;

SherpaOnnxVoiceActivityDetector *vad =
SherpaOnnxCreateVoiceActivityDetector(&vadConfig, 30);

if (vad == NULL) {
fprintf(stderr, "Please check your recognizer config!\n");
SherpaOnnxFreeWave(wave);
SherpaOnnxDestroyOfflineRecognizer(recognizer);
return -1;
}

int32_t window_size = vadConfig.silero_vad.window_size;
int32_t i = 0;

while (i + window_size < wave->num_samples) {
SherpaOnnxVoiceActivityDetectorAcceptWaveform(vad, wave->samples + i,
window_size);
i += window_size;

while (!SherpaOnnxVoiceActivityDetectorEmpty(vad)) {
const SherpaOnnxSpeechSegment *segment =
SherpaOnnxVoiceActivityDetectorFront(vad);

const SherpaOnnxOfflineStream *stream =
SherpaOnnxCreateOfflineStream(recognizer);

SherpaOnnxAcceptWaveformOffline(stream, wave->sample_rate,
segment->samples, segment->n);

SherpaOnnxDecodeOfflineStream(recognizer, stream);

const SherpaOnnxOfflineRecognizerResult *result =
SherpaOnnxGetOfflineStreamResult(stream);

float start = segment->start / 16000.0f;
float duration = segment->n / 16000.0f;
float stop = start + duration;

fprintf(stderr, "%.3f -- %.3f: %s\n", start, stop, result->text);

SherpaOnnxDestroyOfflineRecognizerResult(result);
SherpaOnnxDestroyOfflineStream(stream);

SherpaOnnxDestroySpeechSegment(segment);
SherpaOnnxVoiceActivityDetectorPop(vad);
}
}

SherpaOnnxVoiceActivityDetectorFlush(vad);

while (!SherpaOnnxVoiceActivityDetectorEmpty(vad)) {
const SherpaOnnxSpeechSegment *segment =
SherpaOnnxVoiceActivityDetectorFront(vad);

const SherpaOnnxOfflineStream *stream =
SherpaOnnxCreateOfflineStream(recognizer);

SherpaOnnxAcceptWaveformOffline(stream, wave->sample_rate, segment->samples,
segment->n);

SherpaOnnxDecodeOfflineStream(recognizer, stream);

const SherpaOnnxOfflineRecognizerResult *result =
SherpaOnnxGetOfflineStreamResult(stream);

float start = segment->start / 16000.0f;
float duration = segment->n / 16000.0f;
float stop = start + duration;

fprintf(stderr, "%.3f -- %.3f: %s\n", start, stop, result->text);

SherpaOnnxDestroyOfflineRecognizerResult(result);
SherpaOnnxDestroyOfflineStream(stream);

SherpaOnnxDestroySpeechSegment(segment);
SherpaOnnxVoiceActivityDetectorPop(vad);
}

SherpaOnnxDestroyOfflineRecognizer(recognizer);
SherpaOnnxDestroyVoiceActivityDetector(vad);
SherpaOnnxFreeWave(wave);

return 0;
}
1 change: 1 addition & 0 deletions c-api-examples/vad-sense-voice-c-api.c
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ int32_t main() {
vadConfig.silero_vad.threshold = 0.5;
vadConfig.silero_vad.min_silence_duration = 0.5;
vadConfig.silero_vad.min_speech_duration = 0.5;
vadConfig.silero_vad.max_speech_duration = 5;
vadConfig.silero_vad.window_size = 512;
vadConfig.sample_rate = 16000;
vadConfig.num_threads = 1;
Expand Down
Loading
Loading