From aa3eba888be9ddd27763e41af2c702acaf04379a Mon Sep 17 00:00:00 2001 From: Huang Lekai Date: Mon, 3 Jun 2024 12:59:59 +0800 Subject: [PATCH] Using hamming window for Paraformer frontend. --- wenet/cli/paraformer_model.py | 3 ++- wenet/dataset/processor.py | 6 ++++-- .../convert_paraformer_to_wenet_config_and_ckpt.py | 1 + 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/wenet/cli/paraformer_model.py b/wenet/cli/paraformer_model.py index a80c360f0..a43814a3a 100644 --- a/wenet/cli/paraformer_model.py +++ b/wenet/cli/paraformer_model.py @@ -40,7 +40,8 @@ def transcribe(self, audio_file: str, tokens_info: bool = False) -> dict: frame_length=25, frame_shift=10, energy_floor=0.0, - sample_frequency=self.resample_rate) + sample_frequency=self.resample_rate, + window_type="hamming") feats = feats.unsqueeze(0) feats_lens = torch.tensor([feats.size(1)], dtype=torch.int64, diff --git a/wenet/dataset/processor.py b/wenet/dataset/processor.py index 4d3a80961..4de0a29cf 100644 --- a/wenet/dataset/processor.py +++ b/wenet/dataset/processor.py @@ -231,7 +231,8 @@ def compute_fbank(sample, num_mel_bins=23, frame_length=25, frame_shift=10, - dither=0.0): + dither=0.0, + window_type="povey"): """ Extract fbank Args: @@ -253,7 +254,8 @@ def compute_fbank(sample, frame_shift=frame_shift, dither=dither, energy_floor=0.0, - sample_frequency=sample_rate) + sample_frequency=sample_rate, + window_type=window_type) sample['feat'] = mat return sample diff --git a/wenet/paraformer/convert_paraformer_to_wenet_config_and_ckpt.py b/wenet/paraformer/convert_paraformer_to_wenet_config_and_ckpt.py index 6dee02b08..859613391 100644 --- a/wenet/paraformer/convert_paraformer_to_wenet_config_and_ckpt.py +++ b/wenet/paraformer/convert_paraformer_to_wenet_config_and_ckpt.py @@ -140,6 +140,7 @@ def convert_to_wenet_yaml(configs, wenet_yaml_path: str, configs['dataset_conf']['fbank_conf']['frame_shift'] = 10 configs['dataset_conf']['fbank_conf']['frame_length'] = 25 configs['dataset_conf']['fbank_conf']['dither'] = 0.1 + configs['dataset_conf']['fbank_conf']['window_type'] = 'hamming' configs['dataset_conf']['spec_sub'] = False configs['dataset_conf']['spec_trim'] = False configs['dataset_conf']['shuffle'] = True