-
Notifications
You must be signed in to change notification settings - Fork 424
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add Golang API for spoken language identification. (#709)
- Loading branch information
1 parent
12efbf7
commit a042f44
Showing
10 changed files
with
242 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
module vad-spoken-language-identification | ||
|
||
go 1.12 |
141 changes: 141 additions & 0 deletions
141
go-api-examples/vad-spoken-language-identification/main.go
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,141 @@ | ||
package main | ||
|
||
import ( | ||
"fmt" | ||
iso639 "github.com/barbashov/iso639-3" | ||
"github.com/gordonklaus/portaudio" | ||
sherpa "github.com/k2-fsa/sherpa-onnx-go/sherpa_onnx" | ||
"log" | ||
) | ||
|
||
func main() { | ||
log.SetFlags(log.LstdFlags | log.Lmicroseconds) | ||
|
||
// 1. Create VAD | ||
config := sherpa.VadModelConfig{} | ||
|
||
// Please download silero_vad.onnx from | ||
// https://github.com/snakers4/silero-vad/blob/master/files/silero_vad.onnx | ||
|
||
config.SileroVad.Model = "./silero_vad.onnx" | ||
config.SileroVad.Threshold = 0.5 | ||
config.SileroVad.MinSilenceDuration = 0.5 | ||
config.SileroVad.MinSpeechDuration = 0.25 | ||
config.SileroVad.WindowSize = 512 | ||
config.SampleRate = 16000 | ||
config.NumThreads = 1 | ||
config.Provider = "cpu" | ||
config.Debug = 1 | ||
|
||
var bufferSizeInSeconds float32 = 20 | ||
|
||
vad := sherpa.NewVoiceActivityDetector(&config, bufferSizeInSeconds) | ||
defer sherpa.DeleteVoiceActivityDetector(vad) | ||
|
||
// 2. Create spoken language identifier | ||
|
||
c := sherpa.SpokenLanguageIdentificationConfig{} | ||
c.Whisper.Encoder = "./sherpa-onnx-whisper-tiny/tiny-encoder.int8.onnx" | ||
c.Whisper.Decoder = "./sherpa-onnx-whisper-tiny/tiny-decoder.int8.onnx" | ||
c.NumThreads = 2 | ||
c.Debug = 1 | ||
c.Provider = "cpu" | ||
|
||
slid := sherpa.NewSpokenLanguageIdentification(&c) | ||
defer sherpa.DeleteSpokenLanguageIdentification(slid) | ||
|
||
err := portaudio.Initialize() | ||
if err != nil { | ||
log.Fatalf("Unable to initialize portaudio: %v\n", err) | ||
} | ||
defer portaudio.Terminate() | ||
|
||
default_device, err := portaudio.DefaultInputDevice() | ||
if err != nil { | ||
log.Fatal("Failed to get default input device: %v\n", err) | ||
} | ||
log.Printf("Selected default input device: %s\n", default_device.Name) | ||
param := portaudio.StreamParameters{} | ||
param.Input.Device = default_device | ||
param.Input.Channels = 1 | ||
param.Input.Latency = default_device.DefaultHighInputLatency | ||
|
||
param.SampleRate = float64(config.SampleRate) | ||
param.FramesPerBuffer = 0 | ||
param.Flags = portaudio.ClipOff | ||
|
||
// you can choose another value for 0.1 if you want | ||
samplesPerCall := int32(param.SampleRate * 0.1) // 0.1 second | ||
samples := make([]float32, samplesPerCall) | ||
|
||
s, err := portaudio.OpenStream(param, samples) | ||
if err != nil { | ||
log.Fatalf("Failed to open the stream") | ||
} | ||
|
||
defer s.Close() | ||
chk(s.Start()) | ||
|
||
log.Print("Started! Please speak") | ||
printed := false | ||
|
||
k := 0 | ||
for { | ||
chk(s.Read()) | ||
vad.AcceptWaveform(samples) | ||
|
||
if vad.IsSpeech() && !printed { | ||
printed = true | ||
log.Print("Detected speech\n") | ||
} | ||
|
||
if !vad.IsSpeech() { | ||
printed = false | ||
} | ||
|
||
for !vad.IsEmpty() { | ||
speechSegment := vad.Front() | ||
vad.Pop() | ||
|
||
duration := float32(len(speechSegment.Samples)) / float32(config.SampleRate) | ||
|
||
audio := &sherpa.GeneratedAudio{} | ||
audio.Samples = speechSegment.Samples | ||
audio.SampleRate = config.SampleRate | ||
|
||
// Now decode it | ||
go decode(slid, audio, k) | ||
|
||
k += 1 | ||
|
||
log.Printf("Duration: %.2f seconds\n", duration) | ||
} | ||
} | ||
|
||
chk(s.Stop()) | ||
} | ||
|
||
func decode(slid *sherpa.SpokenLanguageIdentification, audio *sherpa.GeneratedAudio, id int) { | ||
stream := slid.CreateStream() | ||
defer sherpa.DeleteOfflineStream(stream) | ||
|
||
stream.AcceptWaveform(audio.SampleRate, audio.Samples) | ||
result := slid.Compute(stream) | ||
lang := iso639.FromPart1Code(result.Lang).Name | ||
log.Printf("Detected language: %v", lang) | ||
|
||
duration := float32(len(audio.Samples)) / float32(audio.SampleRate) | ||
|
||
filename := fmt.Sprintf("seg-%d-%.2f-seconds-%s.wav", id, duration, lang) | ||
ok := audio.Save(filename) | ||
if ok { | ||
log.Printf("Saved to %s", filename) | ||
} | ||
log.Print("----------\n") | ||
} | ||
|
||
func chk(err error) { | ||
if err != nil { | ||
panic(err) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
#!/usr/bin/env bash | ||
|
||
|
||
if [ ! -f ./silero_vad.onnx ]; then | ||
curl -SL -O https://github.com/snakers4/silero-vad/blob/master/files/silero_vad.onnx | ||
fi | ||
|
||
if [ ! -f ./sherpa-onnx-whisper-tiny/tiny-encoder.int8.onnx ]; then | ||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.tar.bz2 | ||
tar xvf sherpa-onnx-whisper-tiny.tar.bz2 | ||
rm sherpa-onnx-whisper-tiny.tar.bz2 | ||
fi | ||
|
||
go mod tidy | ||
go build | ||
./vad-spoken-language-identification |
2 changes: 2 additions & 0 deletions
2
scripts/go/_internal/vad-spoken-language-identification/.gitignore
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
vad-spoken-language-identification | ||
|
5 changes: 5 additions & 0 deletions
5
scripts/go/_internal/vad-spoken-language-identification/go.mod
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
module vad-spoken-language-identification | ||
|
||
go 1.12 | ||
|
||
replace github.com/k2-fsa/sherpa-onnx-go/sherpa_onnx => ../ |
1 change: 1 addition & 0 deletions
1
scripts/go/_internal/vad-spoken-language-identification/main.go
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
/Users/fangjun/open-source/sherpa-onnx/go-api-examples/vad-spoken-language-identification/main.go |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
/Users/fangjun/open-source/sherpa-onnx/go-api-examples/vad-spoken-language-identification/run.sh |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters