Skip to content

Commit

Permalink
Run some automatic tests with GitHub Actions (SYSTRAN#68)
Browse files Browse the repository at this point in the history
  • Loading branch information
guillaumekln authored Mar 22, 2023
1 parent 52264f2 commit 66efd02
Show file tree
Hide file tree
Showing 9 changed files with 143 additions and 2 deletions.
62 changes: 62 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
name: CI

on:
push:
branches:
- master
tags:
- v*
pull_request:
branches:
- master

jobs:
check-code-format:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v3

- name: Set up Python 3.8
uses: actions/setup-python@v4
with:
python-version: 3.8

- name: Install module
run: |
pip install wheel
pip install .[dev] --extra-index-url https://download.pytorch.org/whl/cpu
- name: Check code format with Black
run: |
black --check .
- name: Check imports order with isort
run: |
isort --check-only .
- name: Check code style with Flake8
if: ${{ always() }}
run: |
flake8 .
run-tests:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v3

- name: Set up Python 3.8
uses: actions/setup-python@v4
with:
python-version: 3.8

- name: Install module
run: |
pip install wheel
pip install .[dev] --extra-index-url https://download.pytorch.org/whl/cpu
- name: Run pytest
run: |
pytest -v tests/test.py
7 changes: 7 additions & 0 deletions faster_whisper/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,9 @@
from faster_whisper.audio import decode_audio
from faster_whisper.transcribe import WhisperModel
from faster_whisper.utils import format_timestamp

__all__ = [
"decode_audio",
"WhisperModel",
"format_timestamp",
]
2 changes: 1 addition & 1 deletion faster_whisper/feature_extractor.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import numpy as np


# Adapted from https://github.com/huggingface/transformers/blob/main/src/transformers/models/whisper/feature_extraction_whisper.py
# Adapted from https://github.com/huggingface/transformers/blob/main/src/transformers/models/whisper/feature_extraction_whisper.py # noqa: E501
class FeatureExtractor:
def __init__(
self,
Expand Down
2 changes: 1 addition & 1 deletion faster_whisper/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ def format_timestamp(
seconds: float,
always_include_hours: bool = False,
decimal_marker: str = ".",
):
) -> str:
assert seconds >= 0, "non-negative timestamp expected"
milliseconds = round(seconds * 1000.0)

Expand Down
9 changes: 9 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
[flake8]
max-line-length = 100
ignore =
E203,
W503,

[isort]
profile=black
lines_between_types=1
7 changes: 7 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,13 @@ def get_requirements(path):
install_requires=install_requires,
extras_require={
"conversion": conversion_requires,
"dev": conversion_requires
+ [
"black==23.*",
"flake8==6.*",
"isort==5.*",
"pytest==7.*",
],
},
packages=find_packages(),
)
31 changes: 31 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import os

import ctranslate2
import pytest


@pytest.fixture
def data_dir():
return os.path.join(os.path.dirname(os.path.abspath(__file__)), "data")


@pytest.fixture
def jfk_path(data_dir):
return os.path.join(data_dir, "jfk.flac")


@pytest.fixture(scope="session")
def tiny_model_dir(tmp_path_factory):
model_path = str(tmp_path_factory.mktemp("data") / "model")
convert_model("tiny", model_path)
return model_path


def convert_model(size, output_dir):
name = "openai/whisper-%s" % size

ctranslate2.converters.TransformersConverter(
name,
copy_files=["tokenizer.json"],
load_as_float16=True,
).convert(output_dir, quantization="float16")
Binary file added tests/data/jfk.flac
Binary file not shown.
25 changes: 25 additions & 0 deletions tests/test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
from faster_whisper import WhisperModel


def test_transcribe(tiny_model_dir, jfk_path):
model = WhisperModel(tiny_model_dir)
segments, info = model.transcribe(jfk_path, word_timestamps=True)

assert info.language == "en"
assert info.language_probability > 0.9
assert info.duration == 11

segments = list(segments)

assert len(segments) == 1

segment = segments[0]

assert segment.text == (
" And so my fellow Americans ask not what your country can do for you, "
"ask what you can do for your country."
)

assert segment.text == "".join(word.word for word in segment.words)
assert segment.start == segment.words[0].start
assert segment.end == segment.words[-1].end

0 comments on commit 66efd02

Please sign in to comment.