From 87899c26fe7e64d516080292de09f5a8b8b09359 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Fri, 8 Mar 2024 12:35:05 +0800 Subject: [PATCH] Add Python ASR example with alsa (#324) --- CMakeLists.txt | 3 +- ...microphone-with-endpoint-detection-alsa.py | 111 ++++++++++++++++++ sherpa-ncnn/csrc/alsa.cc | 2 +- sherpa-ncnn/csrc/sherpa-ncnn-alsa.cc | 2 +- sherpa-ncnn/csrc/stream.cc | 1 + sherpa-ncnn/python/csrc/CMakeLists.txt | 13 ++ sherpa-ncnn/python/csrc/alsa.cc | 30 +++++ sherpa-ncnn/python/csrc/alsa.h | 16 +++ sherpa-ncnn/python/csrc/faked-alsa.cc | 47 ++++++++ sherpa-ncnn/python/csrc/sherpa-ncnn.cc | 3 + sherpa-ncnn/python/sherpa_ncnn/__init__.py | 3 +- 11 files changed, 227 insertions(+), 4 deletions(-) create mode 100755 python-api-examples/speech-recognition-from-microphone-with-endpoint-detection-alsa.py create mode 100644 sherpa-ncnn/python/csrc/alsa.cc create mode 100644 sherpa-ncnn/python/csrc/alsa.h create mode 100644 sherpa-ncnn/python/csrc/faked-alsa.cc diff --git a/CMakeLists.txt b/CMakeLists.txt index d21a183f..66fe3d14 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,7 +1,7 @@ cmake_minimum_required(VERSION 3.13 FATAL_ERROR) project(sherpa-ncnn) -set(SHERPA_NCNN_VERSION "2.1.9") +set(SHERPA_NCNN_VERSION "2.1.10") # Disable warning about # @@ -106,6 +106,7 @@ if(SHERPA_NCNN_ENABLE_BINARY AND UNIX AND NOT APPLE) include(CheckIncludeFileCXX) check_include_file_cxx(alsa/asoundlib.h SHERPA_NCNN_HAS_ALSA) if(SHERPA_NCNN_HAS_ALSA) + message(STATUS "With Alsa") add_definitions(-DSHERPA_NCNN_ENABLE_ALSA=1) elseif(UNIX AND NOT APPLE) message(WARNING "\ diff --git a/python-api-examples/speech-recognition-from-microphone-with-endpoint-detection-alsa.py b/python-api-examples/speech-recognition-from-microphone-with-endpoint-detection-alsa.py new file mode 100755 index 00000000..54a3ae3e --- /dev/null +++ b/python-api-examples/speech-recognition-from-microphone-with-endpoint-detection-alsa.py @@ -0,0 +1,111 @@ +#!/usr/bin/env python3 + +# Real-time speech recognition from a microphone with sherpa-ncnn Python API +# with endpoint detection. +# +# Note: This script uses ALSA and works only on Linux systems, especially +# for embedding Linux systems and for running Linux on Windows using WSL. +# +# Please refer to +# https://k2-fsa.github.io/sherpa/ncnn/pretrained_models/index.html +# to download pre-trained models + +import argparse +import sys + +import sherpa_ncnn + + +def get_args(): + parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter + ) + + parser.add_argument( + "--device-name", + type=str, + required=True, + help=""" +The device name specifies which microphone to use in case there are several +on your system. You can use + + arecord -l + +to find all available microphones on your computer. For instance, if it outputs + +**** List of CAPTURE Hardware Devices **** +card 3: UACDemoV10 [UACDemoV1.0], device 0: USB Audio [USB Audio] + Subdevices: 1/1 + Subdevice #0: subdevice #0 + +and if you want to select card 3 and the device 0 on that card, please use: + + plughw:3,0 + +as the device_name. + """, + ) + + return parser.parse_args() + + +def create_recognizer(): + # Please replace the model files if needed. + # See https://k2-fsa.github.io/sherpa/ncnn/pretrained_models/index.html + # for download links. + recognizer = sherpa_ncnn.Recognizer( + tokens="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/tokens.txt", + encoder_param="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/encoder_jit_trace-pnnx.ncnn.param", + encoder_bin="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/encoder_jit_trace-pnnx.ncnn.bin", + decoder_param="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/decoder_jit_trace-pnnx.ncnn.param", + decoder_bin="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/decoder_jit_trace-pnnx.ncnn.bin", + joiner_param="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/joiner_jit_trace-pnnx.ncnn.param", + joiner_bin="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/joiner_jit_trace-pnnx.ncnn.bin", + num_threads=4, + decoding_method="modified_beam_search", + enable_endpoint_detection=True, + rule1_min_trailing_silence=2.4, + rule2_min_trailing_silence=1.2, + rule3_min_utterance_length=300, + hotwords_file="", + hotwords_score=1.5, + ) + return recognizer + + +def main(): + args = get_args() + device_name = args.device_name + print(f"device_name: {device_name}") + alsa = sherpa_ncnn.Alsa(device_name) + + recognizer = create_recognizer() + print("Started! Please speak") + sample_rate = recognizer.sample_rate + samples_per_read = int(0.1 * sample_rate) # 0.1 second = 100 ms + last_result = "" + segment_id = 0 + + while True: + samples = alsa.read(samples_per_read) # a blocking read + recognizer.accept_waveform(sample_rate, samples) + + is_endpoint = recognizer.is_endpoint + + result = recognizer.text + if result and (last_result != result): + last_result = result + print("\r{}:{}".format(segment_id, result), end="", flush=True) + + if is_endpoint: + if result: + print("\r{}:{}".format(segment_id, result), flush=True) + segment_id += 1 + recognizer.reset() + + +if __name__ == "__main__": + try: + main() + except KeyboardInterrupt: + print("\nCaught Ctrl + C. Exiting") diff --git a/sherpa-ncnn/csrc/alsa.cc b/sherpa-ncnn/csrc/alsa.cc index dac9fd17..45d2f02b 100644 --- a/sherpa-ncnn/csrc/alsa.cc +++ b/sherpa-ncnn/csrc/alsa.cc @@ -62,7 +62,7 @@ card 3: UACDemoV10 [UACDemoV1.0], device 0: USB Audio [USB Audio] and if you want to select card 3 and the device 0 on that card, please use: - hw:3,0 + plughw:3,0 )"; diff --git a/sherpa-ncnn/csrc/sherpa-ncnn-alsa.cc b/sherpa-ncnn/csrc/sherpa-ncnn-alsa.cc index fd29f684..77908b0d 100644 --- a/sherpa-ncnn/csrc/sherpa-ncnn-alsa.cc +++ b/sherpa-ncnn/csrc/sherpa-ncnn-alsa.cc @@ -67,7 +67,7 @@ card 3: UACDemoV10 [UACDemoV1.0], device 0: USB Audio [USB Audio] and if you want to select card 3 and the device 0 on that card, please use: - hw:3,0 + plughw:3,0 as the device_name. )usage"; diff --git a/sherpa-ncnn/csrc/stream.cc b/sherpa-ncnn/csrc/stream.cc index 9e6fa4d9..4cc4f704 100644 --- a/sherpa-ncnn/csrc/stream.cc +++ b/sherpa-ncnn/csrc/stream.cc @@ -19,6 +19,7 @@ #include "sherpa-ncnn/csrc/stream.h" #include +#include namespace sherpa_ncnn { diff --git a/sherpa-ncnn/python/csrc/CMakeLists.txt b/sherpa-ncnn/python/csrc/CMakeLists.txt index d0e86bd2..d1ba9202 100644 --- a/sherpa-ncnn/python/csrc/CMakeLists.txt +++ b/sherpa-ncnn/python/csrc/CMakeLists.txt @@ -11,6 +11,12 @@ set(srcs stream.cc ) +if(SHERPA_NCNN_HAS_ALSA) + list(APPEND srcs ${CMAKE_SOURCE_DIR}/sherpa-ncnn/csrc/alsa.cc alsa.cc) +else() + list(APPEND srcs faked-alsa.cc) +endif() + pybind11_add_module(_sherpa_ncnn ${srcs}) target_link_libraries(_sherpa_ncnn PRIVATE sherpa-ncnn-core) @@ -28,6 +34,13 @@ if(NOT WIN32) target_link_libraries(_sherpa_ncnn PRIVATE "-Wl,-rpath,${SHERPA_NCNN_RPATH_ORIGIN}/sherpa_ncnn/lib") endif() +if(SHERPA_NCNN_HAS_ALSA) + if(DEFINED ENV{SHERPA_NCNN_ALSA_LIB_DIR}) + target_link_libraries(_sherpa_ncnn PRIVATE -L$ENV{SHERPA_NCNN_ALSA_LIB_DIR} -lasound) + else() + target_link_libraries(_sherpa_ncnn PRIVATE asound) + endif() +endif() install(TARGETS _sherpa_ncnn DESTINATION ../ diff --git a/sherpa-ncnn/python/csrc/alsa.cc b/sherpa-ncnn/python/csrc/alsa.cc new file mode 100644 index 00000000..3a2bd020 --- /dev/null +++ b/sherpa-ncnn/python/csrc/alsa.cc @@ -0,0 +1,30 @@ +// sherpa-ncnn/python/csrc/alsa.cc +// +// Copyright (c) 2024 Xiaomi Corporation + +#include "sherpa-ncnn/python/csrc/alsa.h" + +#include + +#include "sherpa-ncnn/csrc/alsa.h" + +namespace sherpa_ncnn { + +void PybindAlsa(py::module *m) { + using PyClass = Alsa; + py::class_(*m, "Alsa") + .def(py::init(), py::arg("device_name"), + py::call_guard()) + .def( + "read", + [](PyClass &self, int32_t num_samples) -> std::vector { + return self.Read(num_samples); + }, + py::arg("num_samples"), py::call_guard()) + .def_property_readonly("expected_sample_rate", + &PyClass::GetExpectedSampleRate) + .def_property_readonly("actual_sample_rate", + &PyClass::GetActualSampleRate); +} + +} // namespace sherpa_ncnn diff --git a/sherpa-ncnn/python/csrc/alsa.h b/sherpa-ncnn/python/csrc/alsa.h new file mode 100644 index 00000000..3c641de0 --- /dev/null +++ b/sherpa-ncnn/python/csrc/alsa.h @@ -0,0 +1,16 @@ +// sherpa-ncnn/python/csrc/alsa.h +// +// Copyright (c) 2024 Xiaomi Corporation + +#ifndef SHERPA_NCNN_PYTHON_CSRC_ALSA_H_ +#define SHERPA_NCNN_PYTHON_CSRC_ALSA_H_ + +#include "sherpa-ncnn/python/csrc/sherpa-ncnn.h" + +namespace sherpa_ncnn { + +void PybindAlsa(py::module *m); + +} // namespace sherpa_ncnn + +#endif // SHERPA_NCNN_PYTHON_CSRC_ALSA_H_ diff --git a/sherpa-ncnn/python/csrc/faked-alsa.cc b/sherpa-ncnn/python/csrc/faked-alsa.cc new file mode 100644 index 00000000..3cd1bbf2 --- /dev/null +++ b/sherpa-ncnn/python/csrc/faked-alsa.cc @@ -0,0 +1,47 @@ +// sherpa-ncnn/python/csrc/faked-alsa.cc +// +// Copyright (c) 2024 Xiaomi Corporation + +#include "sherpa-ncnn/csrc/macros.h" +#include "sherpa-ncnn/python/csrc/alsa.h" + +namespace sherpa_ncnn { + +class FakedAlsa { + public: + explicit FakedAlsa(const char *) { + SHERPA_NCNN_LOGE("This function is for Linux only."); +#if (SHERPA_NCNN_ENABLE_ALSA == 0) && (defined(__unix__) || defined(__unix)) + SHERPA_NCNN_LOGE(R"doc( +sherpa-ncnn is compiled without alsa support. To enable that, please run + (1) sudo apt-get install alsa-utils libasound2-dev + (2) rebuild sherpa-ncnn +)doc"); +#endif + exit(-1); + } + + std::vector Read(int32_t) const { return {}; } + int32_t GetExpectedSampleRate() const { return -1; } + int32_t GetActualSampleRate() const { return -1; } +}; + +void PybindAlsa(py::module *m) { + using PyClass = FakedAlsa; + py::class_(*m, "Alsa") + .def(py::init(), py::arg("device_name")) + .def( + "read", + [](PyClass &self, int32_t num_samples) -> std::vector { + return self.Read(num_samples); + }, + py::arg("num_samples"), py::call_guard()) + .def_property_readonly("expected_sample_rate", + &PyClass::GetExpectedSampleRate) + .def_property_readonly("actual_sample_rate", + &PyClass::GetActualSampleRate); +} + +} // namespace sherpa_ncnn + +#endif // SHERPA_NCNN_PYTHON_CSRC_FAKED_ALSA_H_ diff --git a/sherpa-ncnn/python/csrc/sherpa-ncnn.cc b/sherpa-ncnn/python/csrc/sherpa-ncnn.cc index 48b7baf4..92ebe5cb 100644 --- a/sherpa-ncnn/python/csrc/sherpa-ncnn.cc +++ b/sherpa-ncnn/python/csrc/sherpa-ncnn.cc @@ -18,6 +18,7 @@ #include "sherpa-ncnn/python/csrc/sherpa-ncnn.h" +#include "sherpa-ncnn/python/csrc/alsa.h" #include "sherpa-ncnn/python/csrc/decoder.h" #include "sherpa-ncnn/python/csrc/display.h" #include "sherpa-ncnn/python/csrc/endpoint.h" @@ -39,6 +40,8 @@ PYBIND11_MODULE(_sherpa_ncnn, m) { PybindRecognizer(&m); PybindDisplay(&m); + + PybindAlsa(&m); } } // namespace sherpa_ncnn diff --git a/sherpa-ncnn/python/sherpa_ncnn/__init__.py b/sherpa-ncnn/python/sherpa_ncnn/__init__.py index b247eb3c..167165ad 100644 --- a/sherpa-ncnn/python/sherpa_ncnn/__init__.py +++ b/sherpa-ncnn/python/sherpa_ncnn/__init__.py @@ -1,2 +1,3 @@ +from _sherpa_ncnn import Alsa, Display + from .recognizer import Recognizer -from _sherpa_ncnn import Display