Update to faster-whisper 1.1.1 and update param name from vad_onset t…

…o vad_threshold
Softcatala · Jan 1, 2025 · 3ac4fe5 · 3ac4fe5
1 parent 1a4b8ee
commit 3ac4fe5
Show file tree

Hide file tree

Showing 6 changed files with 9 additions and 9 deletions.
diff --git a/Makefile b/Makefile
@@ -6,7 +6,7 @@ run:
 
 install-dependencies-e2e-tests:
 	echo ctranslate2==4.0.0 > constraints.txt
-	pip install --force-reinstall -c constraints.txt faster-whisper==1.1.0
+	pip install --force-reinstall -c constraints.txt faster-whisper==1.1.1
 	echo numpy==1.26 > constraints.txt
 	pip install --force-reinstall -c constraints.txt pyannote.audio==3.3.1
 

diff --git a/e2e-tests/testcmd.py b/e2e-tests/testcmd.py
@@ -164,7 +164,7 @@ def test_options_vad(self):
         with tempfile.TemporaryDirectory() as directory:
             _file = "gossos"
             cmd = (
-                f"cd {directory} && whisper-ctranslate2 {path}/{_file}.mp3 --device cpu --compute_type float32 --vad_filter True --vad_onset 0.5"
+                f"cd {directory} && whisper-ctranslate2 {path}/{_file}.mp3 --device cpu --compute_type float32 --vad_filter True --vad_threshold 0.5"
                 f" --vad_min_speech_duration_ms 2000 --vad_max_speech_duration_s 50000 --output_dir {directory}"
             )
             os.system(cmd)

diff --git a/requirements.txt b/requirements.txt
@@ -1,5 +1,5 @@
 numpy
-faster-whisper>=1.1.0
+faster-whisper>=1.1.1
 ctranslate2
 tqdm
 sounddevice

diff --git a/src/whisper_ctranslate2/commandline.py b/src/whisper_ctranslate2/commandline.py
@@ -381,10 +381,10 @@ def read_command_line():
         )
 
         vad_args.add_argument(
-            "--vad_onset",
+            "--vad_threshold",
             type=float,
             default=None,
-            help="when `vad_filter` is enabled, probabilities above this value are considered as speech. This parameter was called `vad_threshold` before",
+            help="when `vad_filter` is enabled, probabilities above this value are considered as speech.",
         )
 
         vad_args.add_argument(

diff --git a/src/whisper_ctranslate2/transcribe.py b/src/whisper_ctranslate2/transcribe.py
@@ -48,7 +48,7 @@ class TranscriptionOptions(NamedTuple):
     append_punctuations: str
     hallucination_silence_threshold: Optional[float]
     vad_filter: bool
-    vad_onset: Optional[float]
+    vad_threshold: Optional[float]
     vad_min_speech_duration_ms: Optional[int]
     vad_max_speech_duration_s: Optional[int]
     vad_min_silence_duration_ms: Optional[int]
@@ -84,8 +84,8 @@ def _get_colored_text(self, words):
     def _get_vad_parameters_dictionary(self, options):
         vad_parameters = {}
 
-        if options.vad_onset:
-            vad_parameters["onset"] = options.vad_onset
+        if options.vad_threshold:
+            vad_parameters["threshold"] = options.vad_threshold
 
         if options.vad_min_speech_duration_ms:
             vad_parameters["min_speech_duration_ms"] = (

diff --git a/src/whisper_ctranslate2/whisper_ctranslate2.py b/src/whisper_ctranslate2/whisper_ctranslate2.py
@@ -70,7 +70,7 @@ def get_transcription_options(args):
         print_colors=args.pop("print_colors"),
         hallucination_silence_threshold=args.pop("hallucination_silence_threshold"),
         vad_filter=args.pop("vad_filter"),
-        vad_onset=args.pop("vad_onset"),
+        vad_threshold=args.pop("vad_threshold"),
         vad_min_speech_duration_ms=args.pop("vad_min_speech_duration_ms"),
         vad_max_speech_duration_s=args.pop("vad_max_speech_duration_s"),
         vad_min_silence_duration_ms=args.pop("vad_min_silence_duration_ms"),