diff --git a/.github/workflows/lazarus.yaml b/.github/workflows/lazarus.yaml
index 11df53644..09244b2cc 100644
--- a/.github/workflows/lazarus.yaml
+++ b/.github/workflows/lazarus.yaml
@@ -5,6 +5,7 @@ on:
branches:
- master
- lazarus
+ - fix-lazarus
paths:
- '.github/workflows/lazarus.yaml'
- 'CMakeLists.txt'
diff --git a/lazarus-examples/generate_subtitles/generate_subtitles.lpi b/lazarus-examples/generate_subtitles/generate_subtitles.lpi
index 014f8cd17..b2e1a6c70 100644
--- a/lazarus-examples/generate_subtitles/generate_subtitles.lpi
+++ b/lazarus-examples/generate_subtitles/generate_subtitles.lpi
@@ -160,6 +160,10 @@
+
+
+
+
diff --git a/lazarus-examples/generate_subtitles/generate_subtitles.lpr b/lazarus-examples/generate_subtitles/generate_subtitles.lpr
index de4d3c206..2314225c5 100644
--- a/lazarus-examples/generate_subtitles/generate_subtitles.lpr
+++ b/lazarus-examples/generate_subtitles/generate_subtitles.lpr
@@ -11,7 +11,7 @@
athreads,
{$ENDIF}
Interfaces, // this includes the LCL widgetset
- Forms, unit1, my_worker
+ Forms, unit1, my_worker, my_init
{ you can add units after this };
{$R *.res}
diff --git a/lazarus-examples/generate_subtitles/my_init.pas b/lazarus-examples/generate_subtitles/my_init.pas
new file mode 100644
index 000000000..55df79f15
--- /dev/null
+++ b/lazarus-examples/generate_subtitles/my_init.pas
@@ -0,0 +1,358 @@
+unit my_init;
+
+{$mode ObjFPC}{$H+}
+
+interface
+
+uses
+ {$IFDEF UNIX}
+ cthreads,
+ cmem,
+ {$ENDIF}
+ {$IFDEF HASAMIGA}
+ athreads,
+ {$ENDIF}
+ Classes, SysUtils;
+
+type
+ TMyInitThread = class(TThread)
+ private
+ Status: AnsiString;
+ ModelDir: AnsiString;
+ procedure ShowStatus;
+
+ protected
+ procedure Execute; override;
+ public
+ Constructor Create(CreateSuspended: Boolean; ModelDirectory: AnsiString);
+ end;
+
+var
+ MyInitThread: TMyInitThread;
+
+implementation
+
+uses
+ unit1, sherpa_onnx;
+
+function CreateVad(VadFilename: AnsiString): TSherpaOnnxVoiceActivityDetector;
+var
+ Config: TSherpaOnnxVadModelConfig;
+
+ SampleRate: Integer;
+ WindowSize: Integer;
+begin
+ Initialize(Config);
+
+ SampleRate := 16000; {Please don't change it unless you know the details}
+ WindowSize := 512; {Please don't change it unless you know the details}
+
+ Config.SileroVad.Model := VadFilename;
+ Config.SileroVad.MinSpeechDuration := 0.5;
+ Config.SileroVad.MinSilenceDuration := 0.5;
+ Config.SileroVad.Threshold := 0.5;
+ Config.SileroVad.WindowSize := WindowSize;
+ Config.NumThreads:= 2;
+ Config.Debug:= True;
+ Config.Provider:= 'cpu';
+ Config.SampleRate := SampleRate;
+
+ Result := TSherpaOnnxVoiceActivityDetector.Create(Config, 30);
+end;
+
+function CreateOfflineRecognizerTransducer(
+ Tokens: AnsiString;
+ Encoder: AnsiString;
+ Decoder: AnsiString;
+ Joiner: AnsiString;
+ ModelType: AnsiString): TSherpaOnnxOfflineRecognizer;
+var
+ Config: TSherpaOnnxOfflineRecognizerConfig;
+begin
+ Initialize(Config);
+
+ Config.ModelConfig.Transducer.Encoder := Encoder;
+ Config.ModelConfig.Transducer.Decoder := Decoder;
+ Config.ModelConfig.Transducer.Joiner := Joiner;
+
+ Config.ModelConfig.ModelType := ModelType;
+ Config.ModelConfig.Tokens := Tokens;
+ Config.ModelConfig.Provider := 'cpu';
+ Config.ModelConfig.NumThreads := 2;
+ Config.ModelConfig.Debug := False;
+
+ Result := TSherpaOnnxOfflineRecognizer.Create(Config);
+end;
+
+function CreateOfflineRecognizerTeleSpeech(
+ Tokens: AnsiString;
+ TeleSpeech: AnsiString): TSherpaOnnxOfflineRecognizer;
+var
+ Config: TSherpaOnnxOfflineRecognizerConfig;
+begin
+ Initialize(Config);
+
+ Config.ModelConfig.TeleSpeechCtc := TeleSpeech;
+
+ Config.ModelConfig.Tokens := Tokens;
+ Config.ModelConfig.Provider := 'cpu';
+ Config.ModelConfig.NumThreads := 2;
+ Config.ModelConfig.Debug := False;
+
+ Result := TSherpaOnnxOfflineRecognizer.Create(Config);
+end;
+
+function CreateOfflineRecognizerParaformer(
+ Tokens: AnsiString;
+ Paraformer: AnsiString): TSherpaOnnxOfflineRecognizer;
+var
+ Config: TSherpaOnnxOfflineRecognizerConfig;
+begin
+ Initialize(Config);
+
+ Config.ModelConfig.Paraformer.Model := Paraformer;
+
+ Config.ModelConfig.Tokens := Tokens;
+ Config.ModelConfig.Provider := 'cpu';
+ Config.ModelConfig.NumThreads := 2;
+ Config.ModelConfig.Debug := False;
+
+ Result := TSherpaOnnxOfflineRecognizer.Create(Config);
+end;
+
+function CreateOfflineRecognizerSenseVoice(
+ Tokens: AnsiString;
+ SenseVoice: AnsiString): TSherpaOnnxOfflineRecognizer;
+var
+ Config: TSherpaOnnxOfflineRecognizerConfig;
+begin
+ Initialize(Config);
+
+ Config.ModelConfig.SenseVoice.Model := SenseVoice;
+ Config.ModelConfig.SenseVoice.Language := 'auto';
+ Config.ModelConfig.SenseVoice.UseItn := True;
+ Config.ModelConfig.Tokens := Tokens;
+ Config.ModelConfig.Provider := 'cpu';
+ Config.ModelConfig.NumThreads := 2;
+ Config.ModelConfig.Debug := False;
+
+ Result := TSherpaOnnxOfflineRecognizer.Create(Config);
+end;
+
+function CreateOfflineRecognizerWhisper(
+ Tokens: AnsiString;
+ WhisperEncoder: AnsiString;
+ WhisperDecoder: AnsiString): TSherpaOnnxOfflineRecognizer;
+var
+ Config: TSherpaOnnxOfflineRecognizerConfig;
+begin
+ Initialize(Config);
+
+ Config.ModelConfig.Whisper.Encoder := WhisperEncoder;
+ Config.ModelConfig.Whisper.Decoder := WhisperDecoder;
+ Config.ModelConfig.Tokens := Tokens;
+ Config.ModelConfig.Provider := 'cpu';
+ Config.ModelConfig.NumThreads := 2;
+ Config.ModelConfig.Debug := False;
+
+ Result := TSherpaOnnxOfflineRecognizer.Create(Config);
+end;
+
+constructor TMyInitThread.Create(CreateSuspended : boolean; ModelDirectory: AnsiString);
+begin
+ inherited Create(CreateSuspended);
+ ModelDir := ModelDirectory;
+ FreeOnTerminate := True;
+end;
+
+procedure TMyInitThread.ShowStatus;
+begin
+ Form1.UpdateInitStatus(Status);
+end;
+
+procedure TMyInitThread.Execute;
+var
+ Msg: AnsiString;
+ VadFilename: AnsiString;
+ Tokens: AnsiString;
+
+ WhisperEncoder: AnsiString;
+ WhisperDecoder: AnsiString;
+
+ SenseVoice: AnsiString;
+
+ Paraformer: AnsiString;
+
+ TeleSpeech: AnsiString;
+
+ TransducerEncoder: AnsiString; // from icefall
+ TransducerDecoder: AnsiString;
+ TransducerJoiner: AnsiString;
+
+ NeMoTransducerEncoder: AnsiString;
+ NeMoTransducerDecoder: AnsiString;
+ NeMoTransducerJoiner: AnsiString;
+begin
+ VadFilename := ModelDir + 'silero_vad.onnx';
+ Tokens := ModelDir + 'tokens.txt';
+
+ {
+ Please refer to
+ https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/export-onnx.html#available-models
+ for a list of whisper models.
+
+ In the code, we use the normalized filename whisper-encoder.onnx, whisper-decoder.onnx, and tokens.txt
+ You need to rename the existing model files.
+
+ For instance, if you use sherpa-onnx-whisper-tiny.en, you have to do
+ mv tiny.en-tokens.txt tokens.txt
+
+ mv tiny.en-encoder.onnx whisper-encoder.onnx
+ mv tiny.en-decoder.onnx whisper-decoder.onnx
+
+ // or use the int8.onnx
+
+ mv tiny.en-encoder.int8.onnx whisper-encoder.onnx
+ mv tiny.en-decoder.int8.onnx whisper-decoder.onnx
+ }
+ WhisperEncoder := ModelDir + 'whisper-encoder.onnx';
+ WhisperDecoder := ModelDir + 'whisper-decoder.onnx';
+
+
+ {
+ Please refer to
+ https://k2-fsa.github.io/sherpa/onnx/sense-voice/pretrained.html#pre-trained-models
+ to download models for SenseVoice.
+
+ In the code, we use the normalized model name sense-voice.onnx. You have
+ to rename the downloaded model files.
+
+ For example, you need to use
+
+ mv model.onnx sense-voice.onnx
+
+ // or use the int8.onnx
+ mv model.int8.onnx sense-voice.onnx
+ }
+
+ SenseVoice := ModelDir + 'sense-voice.onnx';
+
+ {
+ Please refer to
+ https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-paraformer/index.html
+ to download paraformer models.
+
+ Note that you have to rename model.onnx or model.int8.onnx to paraformer.onnx.
+ An example is given below for the rename:
+
+ cp model.onnx paraformer.onnx
+
+ // or use int8.onnx
+ cp model.int8.onnx paraformer.onnx
+ }
+ Paraformer := ModelDir + 'paraformer.onnx';
+
+
+ {
+ please refer to
+ https://k2-fsa.github.io/sherpa/onnx/pretrained_models/telespeech/models.html
+ to download TeleSpeech models.
+
+ Note that you have to rename model files after downloading. The following
+ is an example
+
+ mv model.onnx telespeech.onnx
+
+ // or to use int8.onnx
+
+ mv model.int8.onnx telespeech.onnx
+ }
+
+ TeleSpeech := ModelDir + 'telespeech.onnx';
+
+
+ {
+ Please refer to
+ https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
+ to download an icefall offline transducer model. Note that you need to rename the
+ model files to transducer-encoder.onnx, transducer-decoder.onnx, and
+ transducer-joiner.onnx
+ }
+ TransducerEncoder := ModelDir + 'transducer-encoder.onnx';
+ TransducerDecoder := ModelDir + 'transducer-decoder.onnx';
+ TransducerJoiner := ModelDir + 'transducer-joiner.onnx';
+
+ {
+ Please visit
+ https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
+ to donwload a NeMo transducer model.
+ }
+ NeMoTransducerEncoder := ModelDir + 'nemo-transducer-encoder.onnx';
+ NeMoTransducerDecoder := ModelDir + 'nemo-transducer-decoder.onnx';
+ NeMoTransducerJoiner := ModelDir + 'nemo-transducer-joiner.onnx';
+
+ if not FileExists(VadFilename) then
+ begin
+ Status := VadFilename + ' does not exist! Please download it from' +
+ sLineBreak + 'https://github.com/k2-fsa/sherpa-onnx/tree/asr-models';
+ Synchronize(@ShowStatus);
+ Exit;
+ end;
+
+ if Form1.Vad = nil then
+ begin
+ Form1.Vad := CreateVad(VadFilename);
+ end;
+
+ if not FileExists(Tokens) then
+ begin
+ Status := Tokens + ' not found. Please download a non-streaming ASR model first!';
+ Synchronize(@ShowStatus);
+ Exit;
+ end;
+
+ if FileExists(WhisperEncoder) and FileExists(WhisperDecoder) then
+ begin
+ Form1.OfflineRecognizer := CreateOfflineRecognizerWhisper(Tokens, WhisperEncoder, WhisperDecoder);
+ Msg := 'Whisper';
+ end
+ else if FileExists(SenseVoice) then
+ begin
+ Form1.OfflineRecognizer := CreateOfflineRecognizerSenseVoice(Tokens, SenseVoice);
+ Msg := 'SenseVoice';
+ end
+ else if FileExists(Paraformer) then
+ begin
+ Form1.OfflineRecognizer := CreateOfflineRecognizerParaformer(Tokens, Paraformer);
+ Msg := 'Paraformer';
+ end
+ else if FileExists(TeleSpeech) then
+ begin
+ Form1.OfflineRecognizer := CreateOfflineRecognizerTeleSpeech(Tokens, TeleSpeech);
+ Msg := 'TeleSpeech';
+ end
+ else if FileExists(TransducerEncoder) and FileExists(TransducerDecoder) and FileExists(TransducerJoiner) then
+ begin
+ Form1.OfflineRecognizer := CreateOfflineRecognizerTransducer(Tokens,
+ TransducerEncoder, TransducerDecoder, TransducerJoiner, 'transducer');
+ Msg := 'Zipformer transducer';
+ end
+ else if FileExists(NeMoTransducerEncoder) and FileExists(NeMoTransducerDecoder) and FileExists(NeMoTransducerJoiner) then
+ begin
+ Form1.OfflineRecognizer := CreateOfflineRecognizerTransducer(Tokens,
+ NeMoTransducerEncoder, NeMoTransducerDecoder, NeMoTransducerJoiner, 'nemo_transducer');
+ Msg := 'NeMo transducer';
+ end
+ else
+ begin
+ Status := 'Please download at least one non-streaming speech recognition model first.';
+ Synchronize(@ShowStatus);
+ Exit;
+ end;
+
+ Status := 'Congratulations! The ' + Msg + ' model is initialized succesfully!';
+ Synchronize(@ShowStatus);
+end;
+
+end.
+
diff --git a/lazarus-examples/generate_subtitles/unit1.pas b/lazarus-examples/generate_subtitles/unit1.pas
index 98c2cd386..3be95adf1 100644
--- a/lazarus-examples/generate_subtitles/unit1.pas
+++ b/lazarus-examples/generate_subtitles/unit1.pas
@@ -41,6 +41,7 @@ TForm1 = class(TForm)
StopTime: Single;
TotalDuration: Single);
procedure UpdateProgress(StopTime: Single; TotalDuration: Single);
+ procedure UpdateInitStatus(Status: AnsiString);
public
Vad: TSherpaOnnxVoiceActivityDetector;
OfflineRecognizer: TSherpaOnnxOfflineRecognizer;
@@ -52,7 +53,8 @@ TForm1 = class(TForm)
implementation
uses
- my_worker
+ my_worker,
+ my_init
{$IFDEF DARWIN}
,MacOSAll
,CocoaAll
@@ -76,128 +78,7 @@ function GetResourcesPath(): AnsiString;
end;
{$ENDIF}
-function CreateVad(VadFilename: AnsiString): TSherpaOnnxVoiceActivityDetector;
-var
- Config: TSherpaOnnxVadModelConfig;
-
- SampleRate: Integer;
- WindowSize: Integer;
-begin
- Initialize(Config);
-
- SampleRate := 16000; {Please don't change it unless you know the details}
- WindowSize := 512; {Please don't change it unless you know the details}
-
- Config.SileroVad.Model := VadFilename;
- Config.SileroVad.MinSpeechDuration := 0.5;
- Config.SileroVad.MinSilenceDuration := 0.5;
- Config.SileroVad.Threshold := 0.5;
- Config.SileroVad.WindowSize := WindowSize;
- Config.NumThreads:= 2;
- Config.Debug:= True;
- Config.Provider:= 'cpu';
- Config.SampleRate := SampleRate;
-
- Result := TSherpaOnnxVoiceActivityDetector.Create(Config, 30);
-end;
-
-function CreateOfflineRecognizerTransducer(
- Tokens: AnsiString;
- Encoder: AnsiString;
- Decoder: AnsiString;
- Joiner: AnsiString;
- ModelType: AnsiString): TSherpaOnnxOfflineRecognizer;
-var
- Config: TSherpaOnnxOfflineRecognizerConfig;
-begin
- Initialize(Config);
-
- Config.ModelConfig.Transducer.Encoder := Encoder;
- Config.ModelConfig.Transducer.Decoder := Decoder;
- Config.ModelConfig.Transducer.Joiner := Joiner;
-
- Config.ModelConfig.ModelType := ModelType;
- Config.ModelConfig.Tokens := Tokens;
- Config.ModelConfig.Provider := 'cpu';
- Config.ModelConfig.NumThreads := 2;
- Config.ModelConfig.Debug := False;
-
- Result := TSherpaOnnxOfflineRecognizer.Create(Config);
-end;
-
-function CreateOfflineRecognizerTeleSpeech(
- Tokens: AnsiString;
- TeleSpeech: AnsiString): TSherpaOnnxOfflineRecognizer;
-var
- Config: TSherpaOnnxOfflineRecognizerConfig;
-begin
- Initialize(Config);
-
- Config.ModelConfig.TeleSpeechCtc := TeleSpeech;
-
- Config.ModelConfig.Tokens := Tokens;
- Config.ModelConfig.Provider := 'cpu';
- Config.ModelConfig.NumThreads := 2;
- Config.ModelConfig.Debug := False;
-
- Result := TSherpaOnnxOfflineRecognizer.Create(Config);
-end;
-
-function CreateOfflineRecognizerParaformer(
- Tokens: AnsiString;
- Paraformer: AnsiString): TSherpaOnnxOfflineRecognizer;
-var
- Config: TSherpaOnnxOfflineRecognizerConfig;
-begin
- Initialize(Config);
-
- Config.ModelConfig.Paraformer.Model := Paraformer;
-
- Config.ModelConfig.Tokens := Tokens;
- Config.ModelConfig.Provider := 'cpu';
- Config.ModelConfig.NumThreads := 2;
- Config.ModelConfig.Debug := False;
-
- Result := TSherpaOnnxOfflineRecognizer.Create(Config);
-end;
-
-function CreateOfflineRecognizerSenseVoice(
- Tokens: AnsiString;
- SenseVoice: AnsiString): TSherpaOnnxOfflineRecognizer;
-var
- Config: TSherpaOnnxOfflineRecognizerConfig;
-begin
- Initialize(Config);
- Config.ModelConfig.SenseVoice.Model := SenseVoice;
- Config.ModelConfig.SenseVoice.Language := 'auto';
- Config.ModelConfig.SenseVoice.UseItn := True;
- Config.ModelConfig.Tokens := Tokens;
- Config.ModelConfig.Provider := 'cpu';
- Config.ModelConfig.NumThreads := 2;
- Config.ModelConfig.Debug := False;
-
- Result := TSherpaOnnxOfflineRecognizer.Create(Config);
-end;
-
-function CreateOfflineRecognizerWhisper(
- Tokens: AnsiString;
- WhisperEncoder: AnsiString;
- WhisperDecoder: AnsiString): TSherpaOnnxOfflineRecognizer;
-var
- Config: TSherpaOnnxOfflineRecognizerConfig;
-begin
- Initialize(Config);
-
- Config.ModelConfig.Whisper.Encoder := WhisperEncoder;
- Config.ModelConfig.Whisper.Decoder := WhisperDecoder;
- Config.ModelConfig.Tokens := Tokens;
- Config.ModelConfig.Provider := 'cpu';
- Config.ModelConfig.NumThreads := 2;
- Config.ModelConfig.Debug := False;
-
- Result := TSherpaOnnxOfflineRecognizer.Create(Config);
-end;
{$R *.lfm}
@@ -256,7 +137,7 @@ procedure TForm1.FileNameEdtChange(Sender: TObject);
procedure TForm1.FormClose(Sender: TObject; var CloseAction: TCloseAction);
begin
- if (MyWorkerThread <> nil) and not MyWorkerThread.Finished then
+ if (MyWorkerThread <> nil) and (not MyWorkerThread.Finished) then
begin
MyWorkerThread.Terminate;
MyWorkerThread.WaitFor;
@@ -310,29 +191,35 @@ procedure TForm1.UpdateResult(
Form1.ResultMemo.Lines.Add(NewResult);
end;
-procedure TForm1.InitBtnClick(Sender: TObject);
-var
- Msg: AnsiString;
- ModelDir: AnsiString;
- VadFilename: AnsiString;
- Tokens: AnsiString;
-
- WhisperEncoder: AnsiString;
- WhisperDecoder: AnsiString;
-
- SenseVoice: AnsiString;
-
- Paraformer: AnsiString;
+procedure TForm1.UpdateInitStatus(Status: AnsiString);
+begin
+ if EndsStr('model is initialized succesfully!', Status) then
+ begin
+ Form1.ResultMemo.Lines.Add(Status);
+ Form1.ResultMemo.Lines.Add('Please select a 16000Hz wave file to generate subtiles');
+ Form1.ResultMemo.Lines.Add('You can download some test wave files from https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models');
+ Form1.ResultMemo.Lines.Add('For instance:');
+ Form1.ResultMemo.Lines.Add(' Chinese test wave: https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/lei-jun-test.wav');
+ Form1.ResultMemo.Lines.Add(' English test wave: https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/Obama.wav');
+ FileNameEdt.Enabled := True;
+ SelectFileBtn.Enabled := True;
- TeleSpeech: AnsiString;
+ end
+ else
+ begin
+ ShowMessage(Status);
+ Form1.ResultMemo.Lines.Clear();
+ Form1.ResultMemo.Lines.Add('Please refer to');
+ Form1.ResultMemo.Lines.Add('https://k2-fsa.github.io/sherpa/onnx/lazarus/generate-subtitles.html#download-models');
+ Form1.ResultMemo.Lines.Add('for how to download models');
- TransducerEncoder: AnsiString; // from icefall
- TransducerDecoder: AnsiString;
- TransducerJoiner: AnsiString;
+ InitBtn.Enabled := True;
+ end;
+end;
- NeMoTransducerEncoder: AnsiString;
- NeMoTransducerDecoder: AnsiString;
- NeMoTransducerJoiner: AnsiString;
+procedure TForm1.InitBtnClick(Sender: TObject);
+var
+ ModelDir: AnsiString;
begin
{$IFDEF DARWIN}
ModelDir := GetResourcesPath;
@@ -340,162 +227,10 @@ procedure TForm1.InitBtnClick(Sender: TObject);
ModelDir := './';
{$ENDIF}
- VadFilename := ModelDir + 'silero_vad.onnx';
- Tokens := ModelDir + 'tokens.txt';
-
- {
- Please refer to
- https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/export-onnx.html#available-models
- for a list of whisper models.
-
- In the code, we use the normalized filename whisper-encoder.onnx, whisper-decoder.onnx, and tokens.txt
- You need to rename the existing model files.
-
- For instance, if you use sherpa-onnx-whisper-tiny.en, you have to do
- mv tiny.en-tokens.txt tokens.txt
-
- mv tiny.en-encoder.onnx whisper-encoder.onnx
- mv tiny.en-decoder.onnx whisper-decoder.onnx
-
- // or use the int8.onnx
-
- mv tiny.en-encoder.int8.onnx whisper-encoder.onnx
- mv tiny.en-decoder.int8.onnx whisper-decoder.onnx
- }
- WhisperEncoder := ModelDir + 'whisper-encoder.onnx';
- WhisperDecoder := ModelDir + 'whisper-decoder.onnx';
-
-
- {
- Please refer to
- https://k2-fsa.github.io/sherpa/onnx/sense-voice/pretrained.html#pre-trained-models
- to download models for SenseVoice.
-
- In the code, we use the normalized model name sense-voice.onnx. You have
- to rename the downloaded model files.
-
- For example, you need to use
-
- mv model.onnx sense-voice.onnx
-
- // or use the int8.onnx
- mv model.int8.onnx sense-voice.onnx
- }
-
- SenseVoice := ModelDir + 'sense-voice.onnx';
-
- {
- Please refer to
- https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-paraformer/index.html
- to download paraformer models.
-
- Note that you have to rename model.onnx or model.int8.onnx to paraformer.onnx.
- An example is given below for the rename:
-
- cp model.onnx paraformer.onnx
-
- // or use int8.onnx
- cp model.int8.onnx paraformer.onnx
- }
- Paraformer := ModelDir + 'paraformer.onnx';
-
-
- {
- please refer to
- https://k2-fsa.github.io/sherpa/onnx/pretrained_models/telespeech/models.html
- to download TeleSpeech models.
-
- Note that you have to rename model files after downloading. The following
- is an example
-
- mv model.onnx telespeech.onnx
-
- // or to use int8.onnx
-
- mv model.int8.onnx telespeech.onnx
- }
-
- TeleSpeech := ModelDir + 'telespeech.onnx';
-
-
- {
- Please refer to
- https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
- to download an icefall offline transducer model. Note that you need to rename the
- model files to transducer-encoder.onnx, transducer-decoder.onnx, and
- transducer-joiner.onnx
- }
- TransducerEncoder := ModelDir + 'transducer-encoder.onnx';
- TransducerDecoder := ModelDir + 'transducer-decoder.onnx';
- TransducerJoiner := ModelDir + 'transducer-joiner.onnx';
-
- {
- Please visit
- https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
- to donwload a NeMo transducer model.
- }
- NeMoTransducerEncoder := ModelDir + 'nemo-transducer-encoder.onnx';
- NeMoTransducerDecoder := ModelDir + 'nemo-transducer-decoder.onnx';
- NeMoTransducerJoiner := ModelDir + 'nemo-transducer-joiner.onnx';
-
- if not FileExists(VadFilename) then
- begin
- ShowMessage(VadFilename + ' does not exist! Please download it from' +
- sLineBreak + 'https://github.com/k2-fsa/sherpa-onnx/tree/asr-models'
- );
- Exit;
- end;
-
- Self.Vad := CreateVad(VadFilename);
-
- if not FileExists(Tokens) then
- begin
- ShowMessage(Tokens + ' not found. Please download a non-streaming ASR model first!');
- Exit;
- end;
-
- if FileExists(WhisperEncoder) and FileExists(WhisperDecoder) then
- begin
- OfflineRecognizer := CreateOfflineRecognizerWhisper(Tokens, WhisperEncoder, WhisperDecoder);
- Msg := 'Whisper';
- end
- else if FileExists(SenseVoice) then
- begin
- OfflineRecognizer := CreateOfflineRecognizerSenseVoice(Tokens, SenseVoice);
- Msg := 'SenseVoice';
- end
- else if FileExists(Paraformer) then
- begin
- OfflineRecognizer := CreateOfflineRecognizerParaformer(Tokens, Paraformer);
- Msg := 'Paraformer';
- end
- else if FileExists(TeleSpeech) then
- begin
- OfflineRecognizer := CreateOfflineRecognizerTeleSpeech(Tokens, TeleSpeech);
- Msg := 'TeleSpeech';
- end
- else if FileExists(TransducerEncoder) and FileExists(TransducerDecoder) and FileExists(TransducerJoiner) then
- begin
- OfflineRecognizer := CreateOfflineRecognizerTransducer(Tokens,
- TransducerEncoder, TransducerDecoder, TransducerJoiner, 'transducer');
- Msg := 'Zipformer transducer';
- end
- else if FileExists(NeMoTransducerEncoder) and FileExists(NeMoTransducerDecoder) and FileExists(NeMoTransducerJoiner) then
- begin
- OfflineRecognizer := CreateOfflineRecognizerTransducer(Tokens,
- NeMoTransducerEncoder, NeMoTransducerDecoder, NeMoTransducerJoiner, 'nemo_transducer');
- Msg := 'NeMo transducer';
- end
- else
- begin
- ShowMessage('Please download at least one non-streaming speech recognition model first.');
- Exit;
- end;
-
- MessageDlg('Congrat! The ' + Msg + ' model is initialized succesfully!', mtInformation, [mbOk], 0);
- FileNameEdt.Enabled := True;
- SelectFileBtn.Enabled := True;
- InitBtn.Enabled := False;
+ Form1.ResultMemo.Lines.Clear();
+ ResultMemo.Lines.Add('Initializing the model. Please wait...');
+ MyInitThread := TMyInitThread.Create(False, ModelDir);
+ InitBtn.Enabled := False;
end;
end.
diff --git a/sherpa-onnx/csrc/silero-vad-model-config.cc b/sherpa-onnx/csrc/silero-vad-model-config.cc
index 6589361ea..7fa844f27 100644
--- a/sherpa-onnx/csrc/silero-vad-model-config.cc
+++ b/sherpa-onnx/csrc/silero-vad-model-config.cc
@@ -69,7 +69,7 @@ bool SileroVadModelConfig::Validate() const {
std::string SileroVadModelConfig::ToString() const {
std::ostringstream os;
- os << "SilerVadModelConfig(";
+ os << "SileroVadModelConfig(";
os << "model=\"" << model << "\", ";
os << "threshold=" << threshold << ", ";
os << "min_silence_duration=" << min_silence_duration << ", ";
diff --git a/sherpa-onnx/pascal-api/sherpa_onnx.pas b/sherpa-onnx/pascal-api/sherpa_onnx.pas
index 26d47fddf..37785251a 100644
--- a/sherpa-onnx/pascal-api/sherpa_onnx.pas
+++ b/sherpa-onnx/pascal-api/sherpa_onnx.pas
@@ -98,6 +98,7 @@ TSherpaOnnxOnlineStream = class
destructor Destroy; override;
procedure AcceptWaveform(Samples: array of Single; SampleRate: Integer);
procedure InputFinished;
+ property GetHandle: Pointer Read Handle;
end;
TSherpaOnnxOnlineRecognizer = class
@@ -116,6 +117,7 @@ TSherpaOnnxOnlineRecognizer = class
function IsEndpoint(Stream: TSherpaOnnxOnlineStream): Boolean;
function GetResult(Stream: TSherpaOnnxOnlineStream): TSherpaOnnxOnlineRecognizerResult;
property Config: TSherpaOnnxOnlineRecognizerConfig Read _Config;
+ property GetHandle: Pointer Read Handle;
end;
TSherpaOnnxOfflineTransducerModelConfig = record
@@ -213,6 +215,7 @@ TSherpaOnnxOfflineStream = class
constructor Create(P: Pointer);
destructor Destroy; override;
procedure AcceptWaveform(Samples: array of Single; SampleRate: Integer);
+ property GetHandle: Pointer Read Handle;
end;
TSherpaOnnxOfflineRecognizer = class
@@ -226,6 +229,7 @@ TSherpaOnnxOfflineRecognizer = class
procedure Decode(Stream: TSherpaOnnxOfflineStream);
function GetResult(Stream: TSherpaOnnxOfflineStream): TSherpaOnnxOfflineRecognizerResult;
property Config: TSherpaOnnxOfflineRecognizerConfig Read _Config;
+ property GetHandle: Pointer Read Handle;
end;
TSherpaOnnxSileroVadModelConfig = record
@@ -262,6 +266,7 @@ TSherpaOnnxCircularBuffer = class
procedure Reset;
function Size: Integer;
function Head: Integer;
+ property GetHandle: Pointer Read Handle;
end;
TSherpaOnnxSpeechSegment = record
@@ -286,6 +291,7 @@ TSherpaOnnxVoiceActivityDetector = class
procedure Reset;
procedure Flush;
property Config: TSherpaOnnxVadModelConfig Read _Config;
+ property GetHandle: Pointer Read Handle;
end;
{ It supports reading a single channel wave with 16-bit encoded samples.