-
Notifications
You must be signed in to change notification settings - Fork 19
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(component,audio): add TASK_DETECT_ACTIVITY and TASK_SEGMENT (#762)
Because - Voice activity detection (VAD) is an essential feature for audio and video segmentation, enabling further text transcription for RAG embedding and LLM understanding. This commit - adds `TASK_DETECT_ACTIVITY` - adds `TASK_SEGMENT` - refactors the previous Audio operator - removing `TASK_CHUNK_AUDIOS` - refactoring `TASK_SLICE_AUDIO` to `TASK_SEGMENT` - makes the production image use base image `debian:bullseye-slim` because `onnxruntime` doesn't support Alpine apk and we probably don't wanna build it from scratch
- Loading branch information
Showing
30 changed files
with
1,080 additions
and
425 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,5 @@ | ||
ARG GOLANG_VERSION=1.22.5 | ||
FROM golang:${GOLANG_VERSION} | ||
FROM golang:${GOLANG_VERSION}-bullseye | ||
|
||
ARG SERVICE_NAME | ||
|
||
|
@@ -11,18 +11,39 @@ ARG TARGETOS TARGETARCH K6_VERSION XK6_VERSION | |
|
||
# Install Python, create virtual environment, and install pdfplumber | ||
RUN apt update && \ | ||
apt install -y python3 python3-venv poppler-utils wv unrtf tidy tesseract-ocr libtesseract-dev libreoffice ffmpeg chromium qpdf && \ | ||
apt install -y python3 python3-venv poppler-utils wv unrtf tidy tesseract-ocr libtesseract-dev libreoffice ffmpeg libsoxr-dev chromium qpdf && \ | ||
python3 -m venv /opt/venv && \ | ||
/opt/venv/bin/pip install pdfplumber mistral-common tokenizers && \ | ||
rm -rf /var/lib/apt/lists/* | ||
|
||
# Install ONNX Runtime (latest release) | ||
ENV ONNXRUNTIME_ROOT_PATH=/usr/local/onnxruntime | ||
RUN apt update && \ | ||
apt install -y wget jq && \ | ||
LATEST_VERSION=$(wget -qO- https://api.github.com/repos/microsoft/onnxruntime/releases/latest | jq -r .tag_name) && \ | ||
ONNX_ARCH=$([ "$TARGETARCH" = "arm64" ] && echo "aarch64" || echo "x64") && \ | ||
wget https://github.com/microsoft/onnxruntime/releases/download/${LATEST_VERSION}/onnxruntime-linux-${ONNX_ARCH}-${LATEST_VERSION#v}.tgz && \ | ||
tar -xzf onnxruntime-linux-${ONNX_ARCH}-${LATEST_VERSION#v}.tgz && \ | ||
mv onnxruntime-linux-${ONNX_ARCH}-${LATEST_VERSION#v} ${ONNXRUNTIME_ROOT_PATH} && \ | ||
rm onnxruntime-linux-${ONNX_ARCH}-${LATEST_VERSION#v}.tgz && \ | ||
apt remove -y wget jq && \ | ||
apt autoremove -y && \ | ||
rm -rf /var/lib/apt/lists/* | ||
|
||
# Set environment variables and create symlinks for ONNX Runtime | ||
ENV C_INCLUDE_PATH=${ONNXRUNTIME_ROOT_PATH}/include | ||
ENV LD_RUN_PATH=${ONNXRUNTIME_ROOT_PATH}/lib | ||
ENV LIBRARY_PATH=${ONNXRUNTIME_ROOT_PATH}/lib | ||
|
||
# tparse | ||
RUN --mount=target=. --mount=type=cache,target=/root/.cache/go-build --mount=type=cache,target=/go/pkg GOOS=$TARGETOS GOARCH=$TARGETARCH go install github.com/mfridman/tparse@latest | ||
|
||
# air | ||
RUN --mount=target=. --mount=type=cache,target=/root/.cache/go-build --mount=type=cache,target=/go/pkg GOOS=$TARGETOS GOARCH=$TARGETARCH go install github.com/cosmtrek/[email protected] | ||
|
||
# k6 | ||
RUN go install go.k6.io/xk6/cmd/xk6@v${XK6_VERSION} | ||
RUN go install github.com/mfridman/[email protected] | ||
RUN xk6 build v${K6_VERSION} --with github.com/grafana/xk6-sql --output /usr/bin/k6 | ||
RUN --mount=target=. --mount=type=cache,target=/root/.cache/go-build --mount=type=cache,target=/go/pkg GOOS=$TARGETOS GOARCH=$TARGETARCH go install go.k6.io/xk6/cmd/xk6@v${XK6_VERSION} | ||
RUN --mount=target=. --mount=type=cache,target=/root/.cache/go-build --mount=type=cache,target=/go/pkg GOOS=$TARGETOS GOARCH=$TARGETARCH xk6 build v${K6_VERSION} --with github.com/grafana/xk6-sql --output /usr/bin/k6 | ||
|
||
# -- set up Go | ||
|
||
|
@@ -38,6 +59,10 @@ ENV GOENV=/go/.config/go/env | |
# required to restore compatibility with those versions. | ||
ENV GODEBUG=tlsrsakex=1 | ||
|
||
# Set up ONNX model and environment variable | ||
COPY ./pkg/component/resources/onnx/silero_vad.onnx /${SERVICE_NAME}/pkg/component/resources/onnx/silero_vad.onnx | ||
ENV ONNX_MODEL_FOLDER_PATH=/${SERVICE_NAME}/pkg/component/resources/onnx | ||
|
||
USER nobody:nogroup | ||
|
||
ENTRYPOINT ["tail", "-f", "/dev/null"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.