config.ini

[Wav2Letter]
# Prediction letters
labels = [_-абвгдеёжзийклмнопрстуфхцчшщъыьэюя ]

# Path to acoustic model
model_path = data/w2l-16khz.hdf

# Path to language model
lm_path = data/vosk/lm.klm

# Path to the lexicon file
lexicon = data/vosk/lexicon.txt

# Path to prediction tokens file
tokens = data/tokens.txt

# Use CPU for acoustic model inference
cpu = 0

# Use greedy decoder instead of CTC decoder
greedy = 0

# Beam threshold for CTC decoder
beam_threshold = 10

# Audio sample rate, should match the acoustic model sample rate
sample_rate = 16000

# Window size in seconds for acoustic model samples
window_size = 0.02

# Window stride in seconds for acoustic model samples
window_stride = 0.01


[Train]
# Path to train manifest csv
train_manifest = data/train.csv

# Path to validation manifest csv
val_manifest = data/val.csv

# Number of training epochs
epochs = 100

# Batch size for training
batch_size = 8

# Initial learning rate
learning_rate = 1e-5

# Run training with half-precision for memory use optimization
fp16 = 1

# Name prefix for checkpoints
checkpoint_name = w2l

# Save checkpoints per batch, 0 means never save
checkpoint_per_batch = 1000

# Location to save checkpoints
save_folder = Checkpoints/

# Continue from checkpoint model
continue_from = data/w2l-16khz.hdf