diff --git a/faster_whisper/transcribe.py b/faster_whisper/transcribe.py index 7d56c5a0..a4ab58cb 100644 --- a/faster_whisper/transcribe.py +++ b/faster_whisper/transcribe.py @@ -7,7 +7,7 @@ from dataclasses import asdict, dataclass from inspect import signature from math import ceil -from typing import BinaryIO, Iterable, List, Optional, Tuple, Union, Any +from typing import Any, BinaryIO, Iterable, List, Optional, Tuple, Union from warnings import warn import ctranslate2 @@ -231,7 +231,7 @@ def transcribe( clip_timestamps: Optional[List[dict]] = None, batch_size: int = 16, hotwords: Optional[str] = None, - language_detection_threshold: float= 0.5, + language_detection_threshold: float = 0.5, language_detection_segments: int = 1, ) -> Tuple[Iterable[Segment], TranscriptionInfo]: """transcribe audio in chunks in batched fashion and return with language info. @@ -1782,16 +1782,22 @@ def detect_language( speech_chunks = get_speech_timestamps(audio, vad_parameters) audio_chunks, _ = collect_chunks(audio, speech_chunks) audio = np.concatenate(audio_chunks, axis=0) - assert audio is not None, "Audio have a problem while concatanating the audio_chunks; return None" + assert ( + audio is not None + ), "Audio have a problem while concatanating the audio_chunks; return None" audio = audio[ : language_detection_segments * self.feature_extractor.n_samples ] features = self.feature_extractor(audio) - assert features is not None, "No features extracted from audio file; return None" + assert ( + features is not None + ), "No features extracted from audio file; return None" features = features[ ..., : language_detection_segments * self.feature_extractor.nb_max_frames ] - assert features is not None, "No features extracted when detectting language in audio segments; return None" + assert ( + features is not None + ), "No features extracted when detectting language in audio segments; return None" detected_language_info = {} for i in range(0, features.shape[-1], self.feature_extractor.nb_max_frames): encoder_output = self.encode( @@ -1862,7 +1868,7 @@ def get_compression_ratio(text: str) -> float: def get_suppressed_tokens( tokenizer: Tokenizer, suppress_tokens: Optional[List[int]], -) -> tuple[int, ...]: +) -> Tuple[int, ...]: if suppress_tokens is None or len(suppress_tokens) == 0: suppress_tokens = [] # interpret empty string as an empty list elif -1 in suppress_tokens: