Skip to content

Commit

Permalink
Update to faster-whisper 1.1.1 and update param name from vad_onset t…
Browse files Browse the repository at this point in the history
…o vad_threshold
  • Loading branch information
jordimas committed Jan 1, 2025
1 parent 1a4b8ee commit 3ac4fe5
Show file tree
Hide file tree
Showing 6 changed files with 9 additions and 9 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ run:

install-dependencies-e2e-tests:
echo ctranslate2==4.0.0 > constraints.txt
pip install --force-reinstall -c constraints.txt faster-whisper==1.1.0
pip install --force-reinstall -c constraints.txt faster-whisper==1.1.1
echo numpy==1.26 > constraints.txt
pip install --force-reinstall -c constraints.txt pyannote.audio==3.3.1

Expand Down
2 changes: 1 addition & 1 deletion e2e-tests/testcmd.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ def test_options_vad(self):
with tempfile.TemporaryDirectory() as directory:
_file = "gossos"
cmd = (
f"cd {directory} && whisper-ctranslate2 {path}/{_file}.mp3 --device cpu --compute_type float32 --vad_filter True --vad_onset 0.5"
f"cd {directory} && whisper-ctranslate2 {path}/{_file}.mp3 --device cpu --compute_type float32 --vad_filter True --vad_threshold 0.5"
f" --vad_min_speech_duration_ms 2000 --vad_max_speech_duration_s 50000 --output_dir {directory}"
)
os.system(cmd)
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
numpy
faster-whisper>=1.1.0
faster-whisper>=1.1.1
ctranslate2
tqdm
sounddevice
Expand Down
4 changes: 2 additions & 2 deletions src/whisper_ctranslate2/commandline.py
Original file line number Diff line number Diff line change
Expand Up @@ -381,10 +381,10 @@ def read_command_line():
)

vad_args.add_argument(
"--vad_onset",
"--vad_threshold",
type=float,
default=None,
help="when `vad_filter` is enabled, probabilities above this value are considered as speech. This parameter was called `vad_threshold` before",
help="when `vad_filter` is enabled, probabilities above this value are considered as speech.",
)

vad_args.add_argument(
Expand Down
6 changes: 3 additions & 3 deletions src/whisper_ctranslate2/transcribe.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ class TranscriptionOptions(NamedTuple):
append_punctuations: str
hallucination_silence_threshold: Optional[float]
vad_filter: bool
vad_onset: Optional[float]
vad_threshold: Optional[float]
vad_min_speech_duration_ms: Optional[int]
vad_max_speech_duration_s: Optional[int]
vad_min_silence_duration_ms: Optional[int]
Expand Down Expand Up @@ -84,8 +84,8 @@ def _get_colored_text(self, words):
def _get_vad_parameters_dictionary(self, options):
vad_parameters = {}

if options.vad_onset:
vad_parameters["onset"] = options.vad_onset
if options.vad_threshold:
vad_parameters["threshold"] = options.vad_threshold

if options.vad_min_speech_duration_ms:
vad_parameters["min_speech_duration_ms"] = (
Expand Down
2 changes: 1 addition & 1 deletion src/whisper_ctranslate2/whisper_ctranslate2.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def get_transcription_options(args):
print_colors=args.pop("print_colors"),
hallucination_silence_threshold=args.pop("hallucination_silence_threshold"),
vad_filter=args.pop("vad_filter"),
vad_onset=args.pop("vad_onset"),
vad_threshold=args.pop("vad_threshold"),
vad_min_speech_duration_ms=args.pop("vad_min_speech_duration_ms"),
vad_max_speech_duration_s=args.pop("vad_max_speech_duration_s"),
vad_min_silence_duration_ms=args.pop("vad_min_silence_duration_ms"),
Expand Down

0 comments on commit 3ac4fe5

Please sign in to comment.