diff --git a/faster_whisper/transcribe.py b/faster_whisper/transcribe.py index f389f09f..1efd2eb0 100644 --- a/faster_whisper/transcribe.py +++ b/faster_whisper/transcribe.py @@ -388,6 +388,10 @@ def transcribe( audio = decode_audio(audio, sampling_rate=sampling_rate) duration = audio.shape[0] / sampling_rate + self.model.logger.info( + "Processing audio with duration %s", format_timestamp(duration) + ) + chunk_length = chunk_length or self.model.feature_extractor.chunk_length # if no segment split is provided, use vad_model and generate segments if not clip_timestamps: @@ -421,6 +425,11 @@ def transcribe( / sampling_rate ) + self.model.logger.info( + "VAD filter removed %s of audio", + format_timestamp(duration - duration_after_vad), + ) + audio_chunks, chunks_metadata = collect_chunks(audio, clip_timestamps) features = ( [self.model.feature_extractor(chunk)[..., :-1] for chunk in audio_chunks]