Skip to content

Commit

Permalink
improved voicebot example
Browse files Browse the repository at this point in the history
  • Loading branch information
adrianliechti committed Oct 6, 2024
1 parent 24e8deb commit d6a8525
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 11 deletions.
26 changes: 17 additions & 9 deletions examples/local-voicebot/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ func main() {

chatmodel := "gpt-4o"
audiomodel := "whisper-1"
speakmodel := "tts-1-hd"
speakmodel := "tts-1"

url := os.Getenv("OPENAI_API_BASE")

Expand All @@ -43,7 +43,7 @@ func main() {
client := openai.NewClient(options...)

messages := []openai.ChatCompletionMessageParamUnion{
openai.SystemMessage("Your knowledge cutoff is 2023-10. You are a helpful, witty, and friendly AI. Act like a human, but remember that you aren't a human and that you can't do human things in the real world. Your voice and personality should be warm and engaging, with a lively and playful tone. If interacting in a non-English language, start by using the standard accent or dialect familiar to the user. Talk quickly. You should always call a function if you can. Answer as briefly and concisely as possible."),
openai.SystemMessage("Your knowledge cutoff is 2023-10. You are a helpful, witty, and friendly AI. Act like a human, but remember that you aren't a human and that you can't do human things in the real world. Your voice and personality should be warm and engaging, with a lively and playful tone. If interacting in a non-English language, start by using the standard accent or dialect familiar to the user. Talk quickly. You should always call a function if you can. Answer as briefly and concisely as possible. Keep it short."),
}

for ctx.Err() == nil {
Expand All @@ -58,7 +58,7 @@ func main() {

transcription, err := client.Audio.Transcriptions.New(ctx, openai.AudioTranscriptionNewParams{
Model: openai.F(audiomodel),
File: openai.F[io.Reader](bytes.NewReader(data)),
File: openai.FileParam(bytes.NewReader(data), "file.wav", "audio/wav"),
})

if err != nil {
Expand All @@ -81,6 +81,8 @@ func main() {

completion := openai.ChatCompletionAccumulator{}

print("📣 ")

for stream.Next() {
chunk := stream.Current()
completion.AddChunk(chunk)
Expand All @@ -90,6 +92,8 @@ func main() {
}
}

println()

if err := stream.Err(); err != nil {
println("error:", err.Error())
continue
Expand All @@ -98,8 +102,6 @@ func main() {
message := completion.Choices[0].Message
messages = append(messages, message)

println("📣 " + message.Content)

sayText(ctx, client, speakmodel, message.Content)
}
}
Expand Down Expand Up @@ -154,21 +156,21 @@ func recordChunk(ctx context.Context) ([]byte, error) {
args = []string{
"-f", "avfoundation",
"-i", ":0",
"-af", "silencedetect=noise=-30dB:d=2",
"-af", "silencedetect=noise=-30dB:d=1",
path,
}
case "windows":
args = []string{
"-f", "dshow",
"-i", "audio=default",
"-af", "silencedetect=noise=-30dB:d=2",
"-af", "silencedetect=noise=-30dB:d=1",
path,
}
case "linux":
args = []string{
"-f", "alsa",
"-i", "default",
"-af", "silencedetect=noise=-30dB:d=2",
"-af", "silencedetect=noise=-30dB:d=1",
path,
}
}
Expand Down Expand Up @@ -206,7 +208,13 @@ func recordChunk(ctx context.Context) ([]byte, error) {
}
}

if err := cmd.Process.Kill(); err != nil {
err = cmd.Process.Signal(os.Interrupt)

if err != nil {
err = cmd.Process.Kill()
}

if err != nil {
fmt.Println("Error killing FFmpeg process:", err)
return nil, err
}
Expand Down
3 changes: 2 additions & 1 deletion pkg/provider/openai/synthesizer.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,8 @@ func (s *Synthesizer) Synthesize(ctx context.Context, content string, options *p
Model: openai.F(s.model),
Input: openai.F(content),

Voice: openai.F(openai.AudioSpeechNewParamsVoiceAlloy),
Voice: openai.F(openai.AudioSpeechNewParamsVoiceAlloy),

ResponseFormat: openai.F(openai.AudioSpeechNewParamsResponseFormatWAV),
})

Expand Down
2 changes: 1 addition & 1 deletion pkg/provider/openai/transcriber.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ func (t *Transcriber) Transcribe(ctx context.Context, input provider.File, optio

transcription, err := t.transcriptions.New(ctx, openai.AudioTranscriptionNewParams{
Model: openai.F(t.model),
File: openai.F(input.Content),
File: openai.FileParam(input.Content, input.Name, ""),
})

if err != nil {
Expand Down

0 comments on commit d6a8525

Please sign in to comment.