diff --git a/.github/workflows/lazarus.yaml b/.github/workflows/lazarus.yaml index 11df53644..09244b2cc 100644 --- a/.github/workflows/lazarus.yaml +++ b/.github/workflows/lazarus.yaml @@ -5,6 +5,7 @@ on: branches: - master - lazarus + - fix-lazarus paths: - '.github/workflows/lazarus.yaml' - 'CMakeLists.txt' diff --git a/lazarus-examples/generate_subtitles/generate_subtitles.lpi b/lazarus-examples/generate_subtitles/generate_subtitles.lpi index 014f8cd17..b2e1a6c70 100644 --- a/lazarus-examples/generate_subtitles/generate_subtitles.lpi +++ b/lazarus-examples/generate_subtitles/generate_subtitles.lpi @@ -160,6 +160,10 @@ + + + + diff --git a/lazarus-examples/generate_subtitles/generate_subtitles.lpr b/lazarus-examples/generate_subtitles/generate_subtitles.lpr index de4d3c206..2314225c5 100644 --- a/lazarus-examples/generate_subtitles/generate_subtitles.lpr +++ b/lazarus-examples/generate_subtitles/generate_subtitles.lpr @@ -11,7 +11,7 @@ athreads, {$ENDIF} Interfaces, // this includes the LCL widgetset - Forms, unit1, my_worker + Forms, unit1, my_worker, my_init { you can add units after this }; {$R *.res} diff --git a/lazarus-examples/generate_subtitles/my_init.pas b/lazarus-examples/generate_subtitles/my_init.pas new file mode 100644 index 000000000..55df79f15 --- /dev/null +++ b/lazarus-examples/generate_subtitles/my_init.pas @@ -0,0 +1,358 @@ +unit my_init; + +{$mode ObjFPC}{$H+} + +interface + +uses + {$IFDEF UNIX} + cthreads, + cmem, + {$ENDIF} + {$IFDEF HASAMIGA} + athreads, + {$ENDIF} + Classes, SysUtils; + +type + TMyInitThread = class(TThread) + private + Status: AnsiString; + ModelDir: AnsiString; + procedure ShowStatus; + + protected + procedure Execute; override; + public + Constructor Create(CreateSuspended: Boolean; ModelDirectory: AnsiString); + end; + +var + MyInitThread: TMyInitThread; + +implementation + +uses + unit1, sherpa_onnx; + +function CreateVad(VadFilename: AnsiString): TSherpaOnnxVoiceActivityDetector; +var + Config: TSherpaOnnxVadModelConfig; + + SampleRate: Integer; + WindowSize: Integer; +begin + Initialize(Config); + + SampleRate := 16000; {Please don't change it unless you know the details} + WindowSize := 512; {Please don't change it unless you know the details} + + Config.SileroVad.Model := VadFilename; + Config.SileroVad.MinSpeechDuration := 0.5; + Config.SileroVad.MinSilenceDuration := 0.5; + Config.SileroVad.Threshold := 0.5; + Config.SileroVad.WindowSize := WindowSize; + Config.NumThreads:= 2; + Config.Debug:= True; + Config.Provider:= 'cpu'; + Config.SampleRate := SampleRate; + + Result := TSherpaOnnxVoiceActivityDetector.Create(Config, 30); +end; + +function CreateOfflineRecognizerTransducer( + Tokens: AnsiString; + Encoder: AnsiString; + Decoder: AnsiString; + Joiner: AnsiString; + ModelType: AnsiString): TSherpaOnnxOfflineRecognizer; +var + Config: TSherpaOnnxOfflineRecognizerConfig; +begin + Initialize(Config); + + Config.ModelConfig.Transducer.Encoder := Encoder; + Config.ModelConfig.Transducer.Decoder := Decoder; + Config.ModelConfig.Transducer.Joiner := Joiner; + + Config.ModelConfig.ModelType := ModelType; + Config.ModelConfig.Tokens := Tokens; + Config.ModelConfig.Provider := 'cpu'; + Config.ModelConfig.NumThreads := 2; + Config.ModelConfig.Debug := False; + + Result := TSherpaOnnxOfflineRecognizer.Create(Config); +end; + +function CreateOfflineRecognizerTeleSpeech( + Tokens: AnsiString; + TeleSpeech: AnsiString): TSherpaOnnxOfflineRecognizer; +var + Config: TSherpaOnnxOfflineRecognizerConfig; +begin + Initialize(Config); + + Config.ModelConfig.TeleSpeechCtc := TeleSpeech; + + Config.ModelConfig.Tokens := Tokens; + Config.ModelConfig.Provider := 'cpu'; + Config.ModelConfig.NumThreads := 2; + Config.ModelConfig.Debug := False; + + Result := TSherpaOnnxOfflineRecognizer.Create(Config); +end; + +function CreateOfflineRecognizerParaformer( + Tokens: AnsiString; + Paraformer: AnsiString): TSherpaOnnxOfflineRecognizer; +var + Config: TSherpaOnnxOfflineRecognizerConfig; +begin + Initialize(Config); + + Config.ModelConfig.Paraformer.Model := Paraformer; + + Config.ModelConfig.Tokens := Tokens; + Config.ModelConfig.Provider := 'cpu'; + Config.ModelConfig.NumThreads := 2; + Config.ModelConfig.Debug := False; + + Result := TSherpaOnnxOfflineRecognizer.Create(Config); +end; + +function CreateOfflineRecognizerSenseVoice( + Tokens: AnsiString; + SenseVoice: AnsiString): TSherpaOnnxOfflineRecognizer; +var + Config: TSherpaOnnxOfflineRecognizerConfig; +begin + Initialize(Config); + + Config.ModelConfig.SenseVoice.Model := SenseVoice; + Config.ModelConfig.SenseVoice.Language := 'auto'; + Config.ModelConfig.SenseVoice.UseItn := True; + Config.ModelConfig.Tokens := Tokens; + Config.ModelConfig.Provider := 'cpu'; + Config.ModelConfig.NumThreads := 2; + Config.ModelConfig.Debug := False; + + Result := TSherpaOnnxOfflineRecognizer.Create(Config); +end; + +function CreateOfflineRecognizerWhisper( + Tokens: AnsiString; + WhisperEncoder: AnsiString; + WhisperDecoder: AnsiString): TSherpaOnnxOfflineRecognizer; +var + Config: TSherpaOnnxOfflineRecognizerConfig; +begin + Initialize(Config); + + Config.ModelConfig.Whisper.Encoder := WhisperEncoder; + Config.ModelConfig.Whisper.Decoder := WhisperDecoder; + Config.ModelConfig.Tokens := Tokens; + Config.ModelConfig.Provider := 'cpu'; + Config.ModelConfig.NumThreads := 2; + Config.ModelConfig.Debug := False; + + Result := TSherpaOnnxOfflineRecognizer.Create(Config); +end; + +constructor TMyInitThread.Create(CreateSuspended : boolean; ModelDirectory: AnsiString); +begin + inherited Create(CreateSuspended); + ModelDir := ModelDirectory; + FreeOnTerminate := True; +end; + +procedure TMyInitThread.ShowStatus; +begin + Form1.UpdateInitStatus(Status); +end; + +procedure TMyInitThread.Execute; +var + Msg: AnsiString; + VadFilename: AnsiString; + Tokens: AnsiString; + + WhisperEncoder: AnsiString; + WhisperDecoder: AnsiString; + + SenseVoice: AnsiString; + + Paraformer: AnsiString; + + TeleSpeech: AnsiString; + + TransducerEncoder: AnsiString; // from icefall + TransducerDecoder: AnsiString; + TransducerJoiner: AnsiString; + + NeMoTransducerEncoder: AnsiString; + NeMoTransducerDecoder: AnsiString; + NeMoTransducerJoiner: AnsiString; +begin + VadFilename := ModelDir + 'silero_vad.onnx'; + Tokens := ModelDir + 'tokens.txt'; + + { + Please refer to + https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/export-onnx.html#available-models + for a list of whisper models. + + In the code, we use the normalized filename whisper-encoder.onnx, whisper-decoder.onnx, and tokens.txt + You need to rename the existing model files. + + For instance, if you use sherpa-onnx-whisper-tiny.en, you have to do + mv tiny.en-tokens.txt tokens.txt + + mv tiny.en-encoder.onnx whisper-encoder.onnx + mv tiny.en-decoder.onnx whisper-decoder.onnx + + // or use the int8.onnx + + mv tiny.en-encoder.int8.onnx whisper-encoder.onnx + mv tiny.en-decoder.int8.onnx whisper-decoder.onnx + } + WhisperEncoder := ModelDir + 'whisper-encoder.onnx'; + WhisperDecoder := ModelDir + 'whisper-decoder.onnx'; + + + { + Please refer to + https://k2-fsa.github.io/sherpa/onnx/sense-voice/pretrained.html#pre-trained-models + to download models for SenseVoice. + + In the code, we use the normalized model name sense-voice.onnx. You have + to rename the downloaded model files. + + For example, you need to use + + mv model.onnx sense-voice.onnx + + // or use the int8.onnx + mv model.int8.onnx sense-voice.onnx + } + + SenseVoice := ModelDir + 'sense-voice.onnx'; + + { + Please refer to + https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-paraformer/index.html + to download paraformer models. + + Note that you have to rename model.onnx or model.int8.onnx to paraformer.onnx. + An example is given below for the rename: + + cp model.onnx paraformer.onnx + + // or use int8.onnx + cp model.int8.onnx paraformer.onnx + } + Paraformer := ModelDir + 'paraformer.onnx'; + + + { + please refer to + https://k2-fsa.github.io/sherpa/onnx/pretrained_models/telespeech/models.html + to download TeleSpeech models. + + Note that you have to rename model files after downloading. The following + is an example + + mv model.onnx telespeech.onnx + + // or to use int8.onnx + + mv model.int8.onnx telespeech.onnx + } + + TeleSpeech := ModelDir + 'telespeech.onnx'; + + + { + Please refer to + https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html + to download an icefall offline transducer model. Note that you need to rename the + model files to transducer-encoder.onnx, transducer-decoder.onnx, and + transducer-joiner.onnx + } + TransducerEncoder := ModelDir + 'transducer-encoder.onnx'; + TransducerDecoder := ModelDir + 'transducer-decoder.onnx'; + TransducerJoiner := ModelDir + 'transducer-joiner.onnx'; + + { + Please visit + https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models + to donwload a NeMo transducer model. + } + NeMoTransducerEncoder := ModelDir + 'nemo-transducer-encoder.onnx'; + NeMoTransducerDecoder := ModelDir + 'nemo-transducer-decoder.onnx'; + NeMoTransducerJoiner := ModelDir + 'nemo-transducer-joiner.onnx'; + + if not FileExists(VadFilename) then + begin + Status := VadFilename + ' does not exist! Please download it from' + + sLineBreak + 'https://github.com/k2-fsa/sherpa-onnx/tree/asr-models'; + Synchronize(@ShowStatus); + Exit; + end; + + if Form1.Vad = nil then + begin + Form1.Vad := CreateVad(VadFilename); + end; + + if not FileExists(Tokens) then + begin + Status := Tokens + ' not found. Please download a non-streaming ASR model first!'; + Synchronize(@ShowStatus); + Exit; + end; + + if FileExists(WhisperEncoder) and FileExists(WhisperDecoder) then + begin + Form1.OfflineRecognizer := CreateOfflineRecognizerWhisper(Tokens, WhisperEncoder, WhisperDecoder); + Msg := 'Whisper'; + end + else if FileExists(SenseVoice) then + begin + Form1.OfflineRecognizer := CreateOfflineRecognizerSenseVoice(Tokens, SenseVoice); + Msg := 'SenseVoice'; + end + else if FileExists(Paraformer) then + begin + Form1.OfflineRecognizer := CreateOfflineRecognizerParaformer(Tokens, Paraformer); + Msg := 'Paraformer'; + end + else if FileExists(TeleSpeech) then + begin + Form1.OfflineRecognizer := CreateOfflineRecognizerTeleSpeech(Tokens, TeleSpeech); + Msg := 'TeleSpeech'; + end + else if FileExists(TransducerEncoder) and FileExists(TransducerDecoder) and FileExists(TransducerJoiner) then + begin + Form1.OfflineRecognizer := CreateOfflineRecognizerTransducer(Tokens, + TransducerEncoder, TransducerDecoder, TransducerJoiner, 'transducer'); + Msg := 'Zipformer transducer'; + end + else if FileExists(NeMoTransducerEncoder) and FileExists(NeMoTransducerDecoder) and FileExists(NeMoTransducerJoiner) then + begin + Form1.OfflineRecognizer := CreateOfflineRecognizerTransducer(Tokens, + NeMoTransducerEncoder, NeMoTransducerDecoder, NeMoTransducerJoiner, 'nemo_transducer'); + Msg := 'NeMo transducer'; + end + else + begin + Status := 'Please download at least one non-streaming speech recognition model first.'; + Synchronize(@ShowStatus); + Exit; + end; + + Status := 'Congratulations! The ' + Msg + ' model is initialized succesfully!'; + Synchronize(@ShowStatus); +end; + +end. + diff --git a/lazarus-examples/generate_subtitles/unit1.pas b/lazarus-examples/generate_subtitles/unit1.pas index 98c2cd386..3be95adf1 100644 --- a/lazarus-examples/generate_subtitles/unit1.pas +++ b/lazarus-examples/generate_subtitles/unit1.pas @@ -41,6 +41,7 @@ TForm1 = class(TForm) StopTime: Single; TotalDuration: Single); procedure UpdateProgress(StopTime: Single; TotalDuration: Single); + procedure UpdateInitStatus(Status: AnsiString); public Vad: TSherpaOnnxVoiceActivityDetector; OfflineRecognizer: TSherpaOnnxOfflineRecognizer; @@ -52,7 +53,8 @@ TForm1 = class(TForm) implementation uses - my_worker + my_worker, + my_init {$IFDEF DARWIN} ,MacOSAll ,CocoaAll @@ -76,128 +78,7 @@ function GetResourcesPath(): AnsiString; end; {$ENDIF} -function CreateVad(VadFilename: AnsiString): TSherpaOnnxVoiceActivityDetector; -var - Config: TSherpaOnnxVadModelConfig; - - SampleRate: Integer; - WindowSize: Integer; -begin - Initialize(Config); - - SampleRate := 16000; {Please don't change it unless you know the details} - WindowSize := 512; {Please don't change it unless you know the details} - - Config.SileroVad.Model := VadFilename; - Config.SileroVad.MinSpeechDuration := 0.5; - Config.SileroVad.MinSilenceDuration := 0.5; - Config.SileroVad.Threshold := 0.5; - Config.SileroVad.WindowSize := WindowSize; - Config.NumThreads:= 2; - Config.Debug:= True; - Config.Provider:= 'cpu'; - Config.SampleRate := SampleRate; - - Result := TSherpaOnnxVoiceActivityDetector.Create(Config, 30); -end; - -function CreateOfflineRecognizerTransducer( - Tokens: AnsiString; - Encoder: AnsiString; - Decoder: AnsiString; - Joiner: AnsiString; - ModelType: AnsiString): TSherpaOnnxOfflineRecognizer; -var - Config: TSherpaOnnxOfflineRecognizerConfig; -begin - Initialize(Config); - - Config.ModelConfig.Transducer.Encoder := Encoder; - Config.ModelConfig.Transducer.Decoder := Decoder; - Config.ModelConfig.Transducer.Joiner := Joiner; - - Config.ModelConfig.ModelType := ModelType; - Config.ModelConfig.Tokens := Tokens; - Config.ModelConfig.Provider := 'cpu'; - Config.ModelConfig.NumThreads := 2; - Config.ModelConfig.Debug := False; - - Result := TSherpaOnnxOfflineRecognizer.Create(Config); -end; - -function CreateOfflineRecognizerTeleSpeech( - Tokens: AnsiString; - TeleSpeech: AnsiString): TSherpaOnnxOfflineRecognizer; -var - Config: TSherpaOnnxOfflineRecognizerConfig; -begin - Initialize(Config); - - Config.ModelConfig.TeleSpeechCtc := TeleSpeech; - - Config.ModelConfig.Tokens := Tokens; - Config.ModelConfig.Provider := 'cpu'; - Config.ModelConfig.NumThreads := 2; - Config.ModelConfig.Debug := False; - - Result := TSherpaOnnxOfflineRecognizer.Create(Config); -end; - -function CreateOfflineRecognizerParaformer( - Tokens: AnsiString; - Paraformer: AnsiString): TSherpaOnnxOfflineRecognizer; -var - Config: TSherpaOnnxOfflineRecognizerConfig; -begin - Initialize(Config); - - Config.ModelConfig.Paraformer.Model := Paraformer; - - Config.ModelConfig.Tokens := Tokens; - Config.ModelConfig.Provider := 'cpu'; - Config.ModelConfig.NumThreads := 2; - Config.ModelConfig.Debug := False; - - Result := TSherpaOnnxOfflineRecognizer.Create(Config); -end; - -function CreateOfflineRecognizerSenseVoice( - Tokens: AnsiString; - SenseVoice: AnsiString): TSherpaOnnxOfflineRecognizer; -var - Config: TSherpaOnnxOfflineRecognizerConfig; -begin - Initialize(Config); - Config.ModelConfig.SenseVoice.Model := SenseVoice; - Config.ModelConfig.SenseVoice.Language := 'auto'; - Config.ModelConfig.SenseVoice.UseItn := True; - Config.ModelConfig.Tokens := Tokens; - Config.ModelConfig.Provider := 'cpu'; - Config.ModelConfig.NumThreads := 2; - Config.ModelConfig.Debug := False; - - Result := TSherpaOnnxOfflineRecognizer.Create(Config); -end; - -function CreateOfflineRecognizerWhisper( - Tokens: AnsiString; - WhisperEncoder: AnsiString; - WhisperDecoder: AnsiString): TSherpaOnnxOfflineRecognizer; -var - Config: TSherpaOnnxOfflineRecognizerConfig; -begin - Initialize(Config); - - Config.ModelConfig.Whisper.Encoder := WhisperEncoder; - Config.ModelConfig.Whisper.Decoder := WhisperDecoder; - Config.ModelConfig.Tokens := Tokens; - Config.ModelConfig.Provider := 'cpu'; - Config.ModelConfig.NumThreads := 2; - Config.ModelConfig.Debug := False; - - Result := TSherpaOnnxOfflineRecognizer.Create(Config); -end; {$R *.lfm} @@ -256,7 +137,7 @@ procedure TForm1.FileNameEdtChange(Sender: TObject); procedure TForm1.FormClose(Sender: TObject; var CloseAction: TCloseAction); begin - if (MyWorkerThread <> nil) and not MyWorkerThread.Finished then + if (MyWorkerThread <> nil) and (not MyWorkerThread.Finished) then begin MyWorkerThread.Terminate; MyWorkerThread.WaitFor; @@ -310,29 +191,35 @@ procedure TForm1.UpdateResult( Form1.ResultMemo.Lines.Add(NewResult); end; -procedure TForm1.InitBtnClick(Sender: TObject); -var - Msg: AnsiString; - ModelDir: AnsiString; - VadFilename: AnsiString; - Tokens: AnsiString; - - WhisperEncoder: AnsiString; - WhisperDecoder: AnsiString; - - SenseVoice: AnsiString; - - Paraformer: AnsiString; +procedure TForm1.UpdateInitStatus(Status: AnsiString); +begin + if EndsStr('model is initialized succesfully!', Status) then + begin + Form1.ResultMemo.Lines.Add(Status); + Form1.ResultMemo.Lines.Add('Please select a 16000Hz wave file to generate subtiles'); + Form1.ResultMemo.Lines.Add('You can download some test wave files from https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models'); + Form1.ResultMemo.Lines.Add('For instance:'); + Form1.ResultMemo.Lines.Add(' Chinese test wave: https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/lei-jun-test.wav'); + Form1.ResultMemo.Lines.Add(' English test wave: https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/Obama.wav'); + FileNameEdt.Enabled := True; + SelectFileBtn.Enabled := True; - TeleSpeech: AnsiString; + end + else + begin + ShowMessage(Status); + Form1.ResultMemo.Lines.Clear(); + Form1.ResultMemo.Lines.Add('Please refer to'); + Form1.ResultMemo.Lines.Add('https://k2-fsa.github.io/sherpa/onnx/lazarus/generate-subtitles.html#download-models'); + Form1.ResultMemo.Lines.Add('for how to download models'); - TransducerEncoder: AnsiString; // from icefall - TransducerDecoder: AnsiString; - TransducerJoiner: AnsiString; + InitBtn.Enabled := True; + end; +end; - NeMoTransducerEncoder: AnsiString; - NeMoTransducerDecoder: AnsiString; - NeMoTransducerJoiner: AnsiString; +procedure TForm1.InitBtnClick(Sender: TObject); +var + ModelDir: AnsiString; begin {$IFDEF DARWIN} ModelDir := GetResourcesPath; @@ -340,162 +227,10 @@ procedure TForm1.InitBtnClick(Sender: TObject); ModelDir := './'; {$ENDIF} - VadFilename := ModelDir + 'silero_vad.onnx'; - Tokens := ModelDir + 'tokens.txt'; - - { - Please refer to - https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/export-onnx.html#available-models - for a list of whisper models. - - In the code, we use the normalized filename whisper-encoder.onnx, whisper-decoder.onnx, and tokens.txt - You need to rename the existing model files. - - For instance, if you use sherpa-onnx-whisper-tiny.en, you have to do - mv tiny.en-tokens.txt tokens.txt - - mv tiny.en-encoder.onnx whisper-encoder.onnx - mv tiny.en-decoder.onnx whisper-decoder.onnx - - // or use the int8.onnx - - mv tiny.en-encoder.int8.onnx whisper-encoder.onnx - mv tiny.en-decoder.int8.onnx whisper-decoder.onnx - } - WhisperEncoder := ModelDir + 'whisper-encoder.onnx'; - WhisperDecoder := ModelDir + 'whisper-decoder.onnx'; - - - { - Please refer to - https://k2-fsa.github.io/sherpa/onnx/sense-voice/pretrained.html#pre-trained-models - to download models for SenseVoice. - - In the code, we use the normalized model name sense-voice.onnx. You have - to rename the downloaded model files. - - For example, you need to use - - mv model.onnx sense-voice.onnx - - // or use the int8.onnx - mv model.int8.onnx sense-voice.onnx - } - - SenseVoice := ModelDir + 'sense-voice.onnx'; - - { - Please refer to - https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-paraformer/index.html - to download paraformer models. - - Note that you have to rename model.onnx or model.int8.onnx to paraformer.onnx. - An example is given below for the rename: - - cp model.onnx paraformer.onnx - - // or use int8.onnx - cp model.int8.onnx paraformer.onnx - } - Paraformer := ModelDir + 'paraformer.onnx'; - - - { - please refer to - https://k2-fsa.github.io/sherpa/onnx/pretrained_models/telespeech/models.html - to download TeleSpeech models. - - Note that you have to rename model files after downloading. The following - is an example - - mv model.onnx telespeech.onnx - - // or to use int8.onnx - - mv model.int8.onnx telespeech.onnx - } - - TeleSpeech := ModelDir + 'telespeech.onnx'; - - - { - Please refer to - https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html - to download an icefall offline transducer model. Note that you need to rename the - model files to transducer-encoder.onnx, transducer-decoder.onnx, and - transducer-joiner.onnx - } - TransducerEncoder := ModelDir + 'transducer-encoder.onnx'; - TransducerDecoder := ModelDir + 'transducer-decoder.onnx'; - TransducerJoiner := ModelDir + 'transducer-joiner.onnx'; - - { - Please visit - https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models - to donwload a NeMo transducer model. - } - NeMoTransducerEncoder := ModelDir + 'nemo-transducer-encoder.onnx'; - NeMoTransducerDecoder := ModelDir + 'nemo-transducer-decoder.onnx'; - NeMoTransducerJoiner := ModelDir + 'nemo-transducer-joiner.onnx'; - - if not FileExists(VadFilename) then - begin - ShowMessage(VadFilename + ' does not exist! Please download it from' + - sLineBreak + 'https://github.com/k2-fsa/sherpa-onnx/tree/asr-models' - ); - Exit; - end; - - Self.Vad := CreateVad(VadFilename); - - if not FileExists(Tokens) then - begin - ShowMessage(Tokens + ' not found. Please download a non-streaming ASR model first!'); - Exit; - end; - - if FileExists(WhisperEncoder) and FileExists(WhisperDecoder) then - begin - OfflineRecognizer := CreateOfflineRecognizerWhisper(Tokens, WhisperEncoder, WhisperDecoder); - Msg := 'Whisper'; - end - else if FileExists(SenseVoice) then - begin - OfflineRecognizer := CreateOfflineRecognizerSenseVoice(Tokens, SenseVoice); - Msg := 'SenseVoice'; - end - else if FileExists(Paraformer) then - begin - OfflineRecognizer := CreateOfflineRecognizerParaformer(Tokens, Paraformer); - Msg := 'Paraformer'; - end - else if FileExists(TeleSpeech) then - begin - OfflineRecognizer := CreateOfflineRecognizerTeleSpeech(Tokens, TeleSpeech); - Msg := 'TeleSpeech'; - end - else if FileExists(TransducerEncoder) and FileExists(TransducerDecoder) and FileExists(TransducerJoiner) then - begin - OfflineRecognizer := CreateOfflineRecognizerTransducer(Tokens, - TransducerEncoder, TransducerDecoder, TransducerJoiner, 'transducer'); - Msg := 'Zipformer transducer'; - end - else if FileExists(NeMoTransducerEncoder) and FileExists(NeMoTransducerDecoder) and FileExists(NeMoTransducerJoiner) then - begin - OfflineRecognizer := CreateOfflineRecognizerTransducer(Tokens, - NeMoTransducerEncoder, NeMoTransducerDecoder, NeMoTransducerJoiner, 'nemo_transducer'); - Msg := 'NeMo transducer'; - end - else - begin - ShowMessage('Please download at least one non-streaming speech recognition model first.'); - Exit; - end; - - MessageDlg('Congrat! The ' + Msg + ' model is initialized succesfully!', mtInformation, [mbOk], 0); - FileNameEdt.Enabled := True; - SelectFileBtn.Enabled := True; - InitBtn.Enabled := False; + Form1.ResultMemo.Lines.Clear(); + ResultMemo.Lines.Add('Initializing the model. Please wait...'); + MyInitThread := TMyInitThread.Create(False, ModelDir); + InitBtn.Enabled := False; end; end. diff --git a/sherpa-onnx/csrc/silero-vad-model-config.cc b/sherpa-onnx/csrc/silero-vad-model-config.cc index 6589361ea..7fa844f27 100644 --- a/sherpa-onnx/csrc/silero-vad-model-config.cc +++ b/sherpa-onnx/csrc/silero-vad-model-config.cc @@ -69,7 +69,7 @@ bool SileroVadModelConfig::Validate() const { std::string SileroVadModelConfig::ToString() const { std::ostringstream os; - os << "SilerVadModelConfig("; + os << "SileroVadModelConfig("; os << "model=\"" << model << "\", "; os << "threshold=" << threshold << ", "; os << "min_silence_duration=" << min_silence_duration << ", "; diff --git a/sherpa-onnx/pascal-api/sherpa_onnx.pas b/sherpa-onnx/pascal-api/sherpa_onnx.pas index 26d47fddf..37785251a 100644 --- a/sherpa-onnx/pascal-api/sherpa_onnx.pas +++ b/sherpa-onnx/pascal-api/sherpa_onnx.pas @@ -98,6 +98,7 @@ TSherpaOnnxOnlineStream = class destructor Destroy; override; procedure AcceptWaveform(Samples: array of Single; SampleRate: Integer); procedure InputFinished; + property GetHandle: Pointer Read Handle; end; TSherpaOnnxOnlineRecognizer = class @@ -116,6 +117,7 @@ TSherpaOnnxOnlineRecognizer = class function IsEndpoint(Stream: TSherpaOnnxOnlineStream): Boolean; function GetResult(Stream: TSherpaOnnxOnlineStream): TSherpaOnnxOnlineRecognizerResult; property Config: TSherpaOnnxOnlineRecognizerConfig Read _Config; + property GetHandle: Pointer Read Handle; end; TSherpaOnnxOfflineTransducerModelConfig = record @@ -213,6 +215,7 @@ TSherpaOnnxOfflineStream = class constructor Create(P: Pointer); destructor Destroy; override; procedure AcceptWaveform(Samples: array of Single; SampleRate: Integer); + property GetHandle: Pointer Read Handle; end; TSherpaOnnxOfflineRecognizer = class @@ -226,6 +229,7 @@ TSherpaOnnxOfflineRecognizer = class procedure Decode(Stream: TSherpaOnnxOfflineStream); function GetResult(Stream: TSherpaOnnxOfflineStream): TSherpaOnnxOfflineRecognizerResult; property Config: TSherpaOnnxOfflineRecognizerConfig Read _Config; + property GetHandle: Pointer Read Handle; end; TSherpaOnnxSileroVadModelConfig = record @@ -262,6 +266,7 @@ TSherpaOnnxCircularBuffer = class procedure Reset; function Size: Integer; function Head: Integer; + property GetHandle: Pointer Read Handle; end; TSherpaOnnxSpeechSegment = record @@ -286,6 +291,7 @@ TSherpaOnnxVoiceActivityDetector = class procedure Reset; procedure Flush; property Config: TSherpaOnnxVadModelConfig Read _Config; + property GetHandle: Pointer Read Handle; end; { It supports reading a single channel wave with 16-bit encoded samples.