From e6fb789c2dbc17d48cc5a6fcd806698e666e4b34 Mon Sep 17 00:00:00 2001 From: Michael Hansen Date: Tue, 24 Oct 2023 15:57:41 -0500 Subject: [PATCH] wyoming-vosk 1.1.0 --- vosk/Dockerfile | 2 +- vosk/Makefile | 2 +- vosk/README.md | 39 ++++++++++++++++++++++++++++----------- whisper/Dockerfile | 1 + whisper/Makefile | 4 ++-- 5 files changed, 33 insertions(+), 15 deletions(-) diff --git a/vosk/Dockerfile b/vosk/Dockerfile index acc28d4..2c41218 100644 --- a/vosk/Dockerfile +++ b/vosk/Dockerfile @@ -4,7 +4,7 @@ ARG TARGETVARIANT # Install wyoming-vosk WORKDIR /usr/src -ENV WYOMING_VOSK_VERSION=1.0.0 +ENV WYOMING_VOSK_VERSION=1.1.0 ENV PIP_BREAK_SYSTEM_PACKAGES=1 RUN \ diff --git a/vosk/Makefile b/vosk/Makefile index b59eb46..6f768da 100644 --- a/vosk/Makefile +++ b/vosk/Makefile @@ -1,6 +1,6 @@ .PHONY: local run update -VERSION := 1.0.0 +VERSION := 1.1.0 TAG := rhasspy/wyoming-vosk PLATFORMS := linux/amd64,linux/arm64,linux/arm/v7 HOST := 0.0.0.0 diff --git a/vosk/README.md b/vosk/README.md index 0d8fd30..89cbd8a 100644 --- a/vosk/README.md +++ b/vosk/README.md @@ -4,6 +4,8 @@ This speech-to-text system can run well, even on a Raspberry Pi 3. Using the corrected or limited modes (described below), you can achieve very high accuracy by restricting the sentences that can be spoken. +Models are automatically downloaded from [HuggingFace](https://huggingface.co/rhasspy/vosk-models), but they are originally from [Alpha Cephei](https://alphacephei.com/vosk/models). Please review the license of each model that you use ([model list](https://github.com/rhasspy/wyoming-vosk/blob/master/wyoming_vosk/download.py)). + ## Modes There are three operating modes: @@ -91,13 +93,14 @@ For example: ``` yaml sentences: - - in: lumos + - in: lou mo ss # lumos out: turn on all the lights - - in: nox + - in: knocks # nox out: turn off all the lights ``` lets you say "lumos" to send "turn on all the lights", and "nox" to send "turn off all the lights". +Notice that we used words that sound like "lumos" and "nox" because [the vocabulary](https://huggingface.co/rhasspy/vosk-models/tree/main/_vocab) of the default English model is limited (`vosk-model-small-en-us-0.15`). The `in` key can also take a list of sentences, all of them outputting the same `out` string. @@ -178,32 +181,46 @@ You can add as many regular expressions to `no_correct_patterns` as you'd like. ## Allow Unknown -With `--allow-unknown`, you can enable the detection of "unknown" words/phrases outside of the model's vocabulary. Transcripts that are "unknown" will be set to empty strings, indicating that nothing was recognized. When combined with [limited sentences](#limited-sentences), this lets you differentiate between in and out of domain sentences. +With `--allow-unknown`, you can enable the detection of "unknown" words/phrases outside of the model's vocabulary. Transcripts that are "unknown" will be set to empty strings, indicating that nothing was recognized. When combined with [limited sentences](#limited), this lets you differentiate between in and out of domain sentences. + +**NOTE**: Some models do not support unknown words/phrases. See [supported languages](#supported-languages). ## Supported Languages -* Arabic (`ar`) -* Breton (`br`) * Catalan (`ca`) * Czech (`cz`) + * Does not work with [allow unknown](#allow-unknown) * German (`de`) + * Does not work with [allow unknown](#allow-unknown) * English (`en`) -* Esperanto (`eo`) * Spanish (`es`) + * Does not work with [allow unknown](#allow-unknown) * Persian (`fa`) + * Does not work with [allow unknown](#allow-unknown) * French (`fr`) * Hindi (`hi`) + * Does not work with [allow unknown](#allow-unknown) * Italian (`it`) -* Japanese (`ja`) + * Does not work with [allow unknown](#allow-unknown) * Korean (`ko`) -* Kazakh (`kz`) + * Does not work with [allow unknown](#allow-unknown) * Dutch (`nl`) * Polish (`pl`) + * Does not work with [allow unknown](#allow-unknown) * Portuguese (`pt`) + * Does not work with [allow unknown](#allow-unknown) * Russian (`ru`) + * Does not work with [allow unknown](#allow-unknown) * Swedish (`sv`) -* Tagalog (`tl`) -* Ukrainian (`uk`) -* Uzbek (`uz`) + * Does not work with [limited sentences](#limited) and [allow unknown](#allow-unknown) * Vietnamese (`vn`) * Chinese (`zh`) + +Not tested (no intent support yet in Home Assistant): + +* Breton (`br`) +* Esperanto (`eo`) +* Japanese (`ja`) +* Kazakh (`kz`) +* Tagalog (`tl`) +* Uzbek (`uz`) diff --git a/whisper/Dockerfile b/whisper/Dockerfile index 67f9213..26ff8c4 100644 --- a/whisper/Dockerfile +++ b/whisper/Dockerfile @@ -16,6 +16,7 @@ RUN \ setuptools \ wheel \ && pip3 install --no-cache-dir \ + --extra-index-url https://www.piwheels.org/simple \ "wyoming-faster-whisper==${WHISPER_VERSION}" \ \ && apt-get purge -y --auto-remove \ diff --git a/whisper/Makefile b/whisper/Makefile index f0c19d6..7447f35 100644 --- a/whisper/Makefile +++ b/whisper/Makefile @@ -1,8 +1,8 @@ .PHONY: local run update -VERSION := 1.0.0 +VERSION := 1.1.0 TAG := rhasspy/wyoming-whisper -PLATFORMS := linux/amd64,linux/arm64 +PLATFORMS := linux/amd64,linux/arm64,linux/armv7 DATA_DIR := ${HOME}/.local/share/wyoming/whisper MODEL := tiny-int8 MODEL_LANG := en