-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.go
138 lines (121 loc) · 3.86 KB
/
main.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
package main
import (
"context"
"fmt"
"io"
"net/http"
_ "net/http/pprof"
"os"
"time"
"fyne.io/fyne/v2"
"fyne.io/fyne/v2/app"
"github.com/facebookincubator/go-belt"
"github.com/facebookincubator/go-belt/tool/logger"
"github.com/facebookincubator/go-belt/tool/logger/implementation/logrus"
"github.com/spf13/pflag"
"github.com/xaionaro-go/audio/pkg/audio"
_ "github.com/xaionaro-go/audio/pkg/audio/backends/oto"
"github.com/xaionaro-go/observability"
"github.com/xaionaro-go/player/pkg/player/builtin"
"github.com/xaionaro-go/speech/pkg/speech"
"github.com/xaionaro-go/speech/pkg/speech/speechtotext/implementations/whisper/consts"
"github.com/xaionaro-go/speech/pkg/subtitleswindow"
)
func syntaxExit(message string) {
fmt.Fprintf(os.Stderr, "syntax error: %s\n", message)
pflag.Usage()
os.Exit(2)
}
func main() {
loggerLevel := logger.LevelDebug
pflag.Var(&loggerLevel, "log-level", "Log level")
langFlag := pflag.String("language", "en-US", "")
shouldTranslateFlag := pflag.Bool("translate", false, "")
netPprofAddr := pflag.String("net-pprof-listen-addr", "", "an address to listen for incoming net/pprof connections")
playbackFlag := pflag.Bool("audio-loopback", false, "[debug] instead of running a subtitles window, playback the audio")
remoteFlag := pflag.String("remote-addr", "", "use a remote speech-to-text engine, instead of running it locally")
textAlignmentFlag := pflag.String("text-align", "center", "allowed values: left, center, right")
vadThreshold := pflag.Float64("vad-threshold", 0.5, "set to <=0 to disable VAD")
gpuFlag := pflag.Int("gpu", -1, "")
pflag.Parse()
if pflag.NArg() < 1 || pflag.NArg() > 2 {
syntaxExit("expected one or two arguments: whisper-model-path [input]")
}
whisperModelPath := pflag.Arg(0)
var mediaURL string
if pflag.NArg() == 2 {
mediaURL = pflag.Arg(1)
}
l := logrus.Default().WithLevel(loggerLevel)
ctx := logger.CtxWithLogger(context.Background(), l)
logger.Default = func() logger.Logger {
return l
}
defer belt.Flush(ctx)
if *netPprofAddr != "" {
observability.Go(ctx, func() { l.Error(http.ListenAndServe(*netPprofAddr, nil)) })
}
var textAlignment fyne.TextAlign
switch *textAlignmentFlag {
case "left":
textAlignment = fyne.TextAlignLeading
case "center":
textAlignment = fyne.TextAlignCenter
case "right":
textAlignment = fyne.TextAlignTrailing
}
var whisperModel []byte
if *remoteFlag == "" || whisperModelPath != "" {
var err error
whisperModel, err = os.ReadFile(whisperModelPath)
if err != nil {
panic(err)
}
}
audioEnc := consts.AudioEncoding()
audioChannels := consts.AudioChannels
var audioInput io.Reader
if mediaURL == "" {
r, w := io.Pipe()
recorder := audio.NewRecorderAuto(ctx)
defer recorder.Close()
logger.Infof(ctx, "using %T as the audio input", recorder.RecorderPCM)
stream, err := recorder.RecordPCM(ctx, audioEnc.SampleRate, audioChannels, audioEnc.PCMFormat, w)
if err != nil {
panic(err)
}
audioInput = r
defer func() {
stream.Close()
}()
} else {
rcv := subtitleswindow.NewDummyPCMPlayer(ctx)
mediaPlayer := builtin.New(ctx, nil, rcv)
logger.Debugf(ctx, "builtin.New(ctx, nil, rcv)")
err := mediaPlayer.OpenURL(ctx, mediaURL)
if err != nil {
panic(err)
}
audioInput = rcv
}
if *playbackFlag {
player := audio.NewPlayerAuto(ctx)
defer player.Close()
logger.Infof(ctx, "using %T as the audio output", player.PlayerPCM)
stream, err := player.PlayPCM(ctx, audioEnc.SampleRate, audioChannels, audioEnc.PCMFormat, time.Millisecond*100, audioInput)
if err != nil {
panic(err)
}
stream.Drain()
<-ctx.Done()
os.Exit(0)
}
app := app.New()
w, err := subtitleswindow.New(ctx, app, "Subtitles", textAlignment, audioInput, *remoteFlag, *gpuFlag, whisperModel, speech.Language(*langFlag), *shouldTranslateFlag, *vadThreshold)
if err != nil {
panic(err)
}
w.Show()
logger.Debugf(ctx, "app.Run()")
app.Run()
}