Skip to content

Commit

Permalink
Update Pascal API to support max speech duration in VAD
Browse files Browse the repository at this point in the history
  • Loading branch information
csukuangfj committed Sep 14, 2024
1 parent 6c964f0 commit be4aedd
Show file tree
Hide file tree
Showing 5 changed files with 21 additions and 3 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/dot-net.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,8 @@ jobs:
git clone https://huggingface.co/csukuangfj/sherpa-onnx-libs huggingface
cd huggingface
git fetch
git pull
mkdir -p windows-for-dotnet
cp -v ../sherpa-onnx-*.tar.bz2 ./windows-for-dotnet
Expand Down
1 change: 1 addition & 0 deletions java-api-examples/VadRemoveSilence.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ public static void main(String[] args) {
.setMinSilenceDuration(0.25f)
.setMinSpeechDuration(0.5f)
.setWindowSize(512)
.setMaxSpeechDuration(5.0f)
.build();

VadModelConfig config =
Expand Down
3 changes: 2 additions & 1 deletion lazarus-examples/generate_subtitles/my_init.pas
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,9 @@ function CreateVad(VadFilename: AnsiString): TSherpaOnnxVoiceActivityDetector;
WindowSize := 512; {Please don't change it unless you know the details}

Config.SileroVad.Model := VadFilename;
Config.SileroVad.MinSpeechDuration := 0.5;
Config.SileroVad.MinSpeechDuration := 0.25;
Config.SileroVad.MinSilenceDuration := 0.5;
Config.SileroVad.MaxSpeechDuration := 5.0;
Config.SileroVad.Threshold := 0.5;
Config.SileroVad.WindowSize := WindowSize;
Config.NumThreads:= 2;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,15 @@ def main():

config = sherpa_onnx.VadModelConfig()
config.silero_vad.model = args.silero_vad_model
config.silero_vad.threshold = 0.5
config.silero_vad.min_silence_duration = 0.25 # seconds
config.silero_vad.min_speech_duration = 0.25 # seconds

# If the current segment is larger than this value, then it increases
# the threshold to 0.9 internally. After detecting this segment,
# it resets the threshold to its original value.
config.silero_vad.max_speech_duration = 5 # seconds

config.sample_rate = sample_rate

window_size = config.silero_vad.window_size
Expand Down
9 changes: 7 additions & 2 deletions sherpa-onnx/pascal-api/sherpa_onnx.pas
Original file line number Diff line number Diff line change
Expand Up @@ -341,6 +341,7 @@ TSherpaOnnxSileroVadModelConfig = record
MinSilenceDuration: Single;
MinSpeechDuration: Single;
WindowSize: Integer;
MaxSpeechDuration: Single;
function ToString: AnsiString;
class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxSileroVadModelConfig);
end;
Expand Down Expand Up @@ -594,6 +595,7 @@ SherpaOnnxSileroVadModelConfig = record
MinSilenceDuration: cfloat;
MinSpeechDuration: cfloat;
WindowSize: cint32;
MaxSpeechDuration: cfloat;
end;
SherpaOnnxVadModelConfig = record
SileroVad: SherpaOnnxSileroVadModelConfig;
Expand Down Expand Up @@ -1402,10 +1404,11 @@ function TSherpaOnnxSileroVadModelConfig.ToString: AnsiString;
'Threshold := %.2f, ' +
'MinSilenceDuration := %.2f, ' +
'MinSpeechDuration := %.2f, ' +
'WindowSize := %d' +
'WindowSize := %d, ' +
'MaxSpeechDuration := %.2f' +
')',
[Self.Model, Self.Threshold, Self.MinSilenceDuration,
Self.MinSpeechDuration, Self.WindowSize
Self.MinSpeechDuration, Self.WindowSize, Self.MaxSpeechDuration
]);
end;

Expand All @@ -1415,6 +1418,7 @@ function TSherpaOnnxSileroVadModelConfig.ToString: AnsiString;
Dest.MinSilenceDuration := 0.5;
Dest.MinSpeechDuration := 0.25;
Dest.WindowSize := 512;
Dest.MaxSpeechDuration := 5.0;
end;

function TSherpaOnnxVadModelConfig.ToString: AnsiString;
Expand Down Expand Up @@ -1569,6 +1573,7 @@ constructor TSherpaOnnxVoiceActivityDetector.Create(Config: TSherpaOnnxVadModelC
C.SileroVad.MinSilenceDuration := Config.SileroVad.MinSilenceDuration;
C.SileroVad.MinSpeechDuration := Config.SileroVad.MinSpeechDuration;
C.SileroVad.WindowSize := Config.SileroVad.WindowSize;
C.SileroVad.MaxSpeechDuration := Config.SileroVad.MaxSpeechDuration;

C.SampleRate := Config.SampleRate;
C.NumThreads := Config.NumThreads;
Expand Down

0 comments on commit be4aedd

Please sign in to comment.