diff --git a/examples/aishell/s0/README.md b/examples/aishell/s0/README.md index 9db2728bf..8eab8da0c 100644 --- a/examples/aishell/s0/README.md +++ b/examples/aishell/s0/README.md @@ -3,17 +3,17 @@ ## Conformer Result * Feature info: using fbank feature, dither=0, cmvn, speed perturb -* Training info: lr 0.002, batch size 16, 8 gpu, acc_grad 4, 200 epochs, dither 0.0 -* Decoding info: ctc_weight 0.6, average_num 30 -* Git hash: 132954a9ce27d0381ed3879c4f43cc158860167e -* Model link: http://mobvoi-speech-public.ufile.ucloud.cn/public/wenet/aishell/20210116_conformer_exp.tar.gz +* Training info: lr 0.002, batch size 18, 4 gpu, acc_grad 4, 240 epochs, dither 0.1 +* Decoding info: ctc_weight 0.5, average_num 20 +* Git hash: 919f07c4887ac500168ba84b39b535fd8e58918a +* Model link: http://mobvoi-speech-public.ufile.ucloud.cn/public/wenet/aishell/20210204_conformer_exp.tar.gz | decoding mode | CER | |------------------------|------| -| attention decoder | 5.36 | -| ctc greedy search | 5.14 | -| ctc prefix beam search | 5.14 | -| attention rescoring | 4.77 | +| attention decoder | 5.18 | +| ctc greedy search | 4.94 | +| ctc prefix beam search | 4.94 | +| attention rescoring | 4.61 | ## Unified Conformer Result @@ -33,16 +33,17 @@ ## Transformer Result * Feature info: using fbank feature, dither=0, with cmvn, no speed perturb. -* Training info: lr 0.002, batch size 16, 8 gpu, acc_grad 1, 120 epochs, dither 0.0 -* Git hash: fb8e0f8c12b5d547fc22e62365e1e114f059c609 -* Model link: http://mobvoi-speech-public.ufile.ucloud.cn/public/wenet/aishell/20210120_transformer_exp.tar.gz +* Training info: lr 0.002, batch size 26, 4 gpu, acc_grad 4, 240 epochs, dither 0.1 +* Decoding info: ctc_weight 0.5, average_num 20 +* Git hash: 919f07c4887ac500168ba84b39b535fd8e58918a +* Model link: http://mobvoi-speech-public.ufile.ucloud.cn/public/wenet/aishell/20210204_transformer_exp.tar.gz | decoding mode | CER | |------------------------|------| -| attention decoder | 5.76 | -| ctc greedy search | 6.21 | -| ctc prefix beam search | 6.21 | -| attention rescoring | 5.47 | +| attention decoder | 5.69 | +| ctc greedy search | 5.92 | +| ctc prefix beam search | 5.91 | +| attention rescoring | 5.30 | ## Unified Transformer Result diff --git a/examples/aishell/s0/conf/train_conformer.yaml b/examples/aishell/s0/conf/train_conformer.yaml index e18db0532..f21c255c9 100644 --- a/examples/aishell/s0/conf/train_conformer.yaml +++ b/examples/aishell/s0/conf/train_conformer.yaml @@ -41,10 +41,10 @@ raw_wav: true collate_conf: # waveform level config wav_distortion_conf: - wav_dither: 0.0 + wav_dither: 0.1 wav_distortion_rate: 0.0 distortion_methods: [] - speed_perturb: false + speed_perturb: true feature_extraction_conf: feature_type: 'fbank' mel_bins: 80 @@ -74,7 +74,7 @@ dataset_conf: grad_clip: 5 accum_grad: 4 -max_epoch: 200 +max_epoch: 240 log_interval: 100 optim: adam diff --git a/examples/aishell/s0/conf/train_transformer.yaml b/examples/aishell/s0/conf/train_transformer.yaml index 7c137ad32..7371204d2 100644 --- a/examples/aishell/s0/conf/train_transformer.yaml +++ b/examples/aishell/s0/conf/train_transformer.yaml @@ -36,10 +36,10 @@ raw_wav: true collate_conf: # waveform level config wav_distortion_conf: - wav_dither: 0.0 + wav_dither: 0.1 wav_distortion_rate: 0.0 distortion_methods: [] - speed_perturb: false + speed_perturb: true feature_extraction_conf: feature_type: 'fbank' mel_bins: 80 @@ -47,7 +47,6 @@ collate_conf: frame_length: 25 using_pitch: false # spec level config - # spec_swap: false feature_dither: 0.0 # add dither [-feature_dither,feature_dither] on fbank feature spec_aug: true spec_aug_conf: @@ -64,12 +63,12 @@ dataset_conf: max_length: 40960 min_length: 0 batch_type: 'static' # static or dynamic - batch_size: 16 + batch_size: 26 sort: true grad_clip: 5 accum_grad: 1 -max_epoch: 120 +max_epoch: 240 log_interval: 100 optim: adam