-
Notifications
You must be signed in to change notification settings - Fork 424
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Object pascal examples for recording and playing audio with portaudio. (
#1271) The recording example can be used for speech recognition while the playing example can be used for text to speech. The portaudio wrapper for object pascal is copied from https://github.com/UltraStar-Deluxe/USDX/blob/master/src/lib/portaudio/portaudio.pas
- Loading branch information
1 parent
f93f0ca
commit e34a1a2
Showing
9 changed files
with
1,562 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
test-record | ||
test-play |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
# Introduction | ||
|
||
[portaudio.pas](./portaudio.pas) | ||
requires that the portaudio library is installed on your system. | ||
|
||
|
||
On macOS, you can use | ||
|
||
```bash | ||
brew install portaudio | ||
``` | ||
|
||
and it will install `portaudio` into `/usr/local/Cellar/portaudio/19.7.0`. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
#!/usr/bin/env bash | ||
|
||
set -ex | ||
|
||
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) | ||
SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd) | ||
|
||
echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR" | ||
|
||
if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then | ||
mkdir -p ../../build | ||
pushd ../../build | ||
cmake \ | ||
-DCMAKE_INSTALL_PREFIX=./install \ | ||
-DSHERPA_ONNX_ENABLE_PYTHON=OFF \ | ||
-DSHERPA_ONNX_ENABLE_TESTS=OFF \ | ||
-DSHERPA_ONNX_ENABLE_CHECK=OFF \ | ||
-DBUILD_SHARED_LIBS=ON \ | ||
-DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ | ||
.. | ||
|
||
cmake --build . --target install --config Release | ||
popd | ||
fi | ||
|
||
fpc \ | ||
-g \ | ||
-dSHERPA_ONNX_USE_SHARED_LIBS \ | ||
-Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \ | ||
-Fl$SHERPA_ONNX_DIR/build/install/lib \ | ||
-Fl/usr/local/Cellar/portaudio/19.7.0/lib \ | ||
./test-play.pas | ||
|
||
export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH | ||
export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH | ||
|
||
./test-play |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
#!/usr/bin/env bash | ||
|
||
set -ex | ||
|
||
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) | ||
SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd) | ||
|
||
echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR" | ||
|
||
if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then | ||
mkdir -p ../../build | ||
pushd ../../build | ||
cmake \ | ||
-DCMAKE_INSTALL_PREFIX=./install \ | ||
-DSHERPA_ONNX_ENABLE_PYTHON=OFF \ | ||
-DSHERPA_ONNX_ENABLE_TESTS=OFF \ | ||
-DSHERPA_ONNX_ENABLE_CHECK=OFF \ | ||
-DBUILD_SHARED_LIBS=ON \ | ||
-DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ | ||
.. | ||
|
||
cmake --build . --target install --config Release | ||
popd | ||
fi | ||
|
||
fpc \ | ||
-g \ | ||
-dSHERPA_ONNX_USE_SHARED_LIBS \ | ||
-Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \ | ||
-Fl$SHERPA_ONNX_DIR/build/install/lib \ | ||
-Fl/usr/local/Cellar/portaudio/19.7.0/lib \ | ||
./test-record.pas | ||
|
||
export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH | ||
export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH | ||
|
||
./test-record |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,160 @@ | ||
{ Copyright (c) 2024 Xiaomi Corporation } | ||
{ | ||
This file shows how to use portaudio for playing. | ||
} | ||
program main; | ||
|
||
{$mode objfpc}{$H+} | ||
|
||
|
||
uses | ||
portaudio, | ||
sherpa_onnx, | ||
dos, | ||
ctypes, | ||
SysUtils; | ||
|
||
var | ||
Version: String; | ||
EnvStr: String; | ||
Status: Integer; | ||
NumDevices: Integer; | ||
DeviceIndex: Integer; | ||
DeviceInfo: PPaDeviceInfo; | ||
I: Integer; | ||
Param: TPaStreamParameters; | ||
Stream: PPaStream; | ||
Wave: TSherpaOnnxWave; | ||
|
||
Buffer: TSherpaOnnxCircularBuffer; | ||
|
||
function PlayCallback( | ||
input: Pointer; output: Pointer; | ||
frameCount: culong; | ||
timeInfo: PPaStreamCallbackTimeInfo; | ||
statusFlags: TPaStreamCallbackFlags; | ||
userData: Pointer ): cint; cdecl; | ||
var | ||
Samples: TSherpaOnnxSamplesArray; | ||
I: Integer; | ||
begin | ||
if Buffer.Size >= frameCount then | ||
begin | ||
Samples := Buffer.Get(Buffer.Head, FrameCount); | ||
Buffer.Pop(FrameCount); | ||
end | ||
else | ||
begin | ||
Samples := Buffer.Get(Buffer.Head, Buffer.Size); | ||
Buffer.Pop(Buffer.Size); | ||
SetLength(Samples, frameCount); | ||
end; | ||
for I := 0 to frameCount - 1 do | ||
pcfloat(output)[I] := Samples[I]; | ||
|
||
if Buffer.Size > 0 then | ||
Result := paContinue | ||
else | ||
Result := paComplete; | ||
end; | ||
|
||
|
||
|
||
begin | ||
Version := String(Pa_GetVersionText); | ||
WriteLn('Version is ', Version); | ||
Status := Pa_Initialize; | ||
if Status <> paNoError then | ||
begin | ||
WriteLn('Failed to initialize portaudio, ', Pa_GetErrorText(Status)); | ||
Exit; | ||
end; | ||
|
||
NumDevices := Pa_GetDeviceCount; | ||
WriteLn('Num devices: ', NumDevices); | ||
|
||
DeviceIndex := Pa_GetDefaultOutputDevice; | ||
|
||
if DeviceIndex = paNoDevice then | ||
begin | ||
WriteLn('No default output device found'); | ||
Pa_Terminate; | ||
Exit; | ||
end; | ||
|
||
EnvStr := GetEnv('SHERPA_ONNX_MIC_DEVICE'); | ||
if EnvStr <> '' then | ||
begin | ||
DeviceIndex := StrToIntDef(EnvStr, DeviceIndex); | ||
WriteLn('Use device index from environment variable SHERPA_ONNX_MIC_DEVICE: ', EnvStr); | ||
end; | ||
|
||
for I := 0 to (NumDevices - 1) do | ||
begin | ||
DeviceInfo := Pa_GetDeviceInfo(I); | ||
if I = DeviceIndex then | ||
{ WriteLn(Format(' * %d %s', [I, DeviceInfo^.Name])) } | ||
WriteLn(Format(' * %d %s', [I, AnsiString(DeviceInfo^.Name)])) | ||
else | ||
WriteLn(Format(' %d %s', [I, AnsiString(DeviceInfo^.Name)])); | ||
end; | ||
|
||
WriteLn('Use device ', DeviceIndex); | ||
WriteLn(' Name ', Pa_GetDeviceInfo(DeviceIndex)^.Name); | ||
WriteLn(' Max output channels ', Pa_GetDeviceInfo(DeviceIndex)^.MaxOutputChannels); | ||
|
||
Wave := SherpaOnnxReadWave('./record.wav'); | ||
if Wave.Samples = nil then | ||
begin | ||
WriteLn('Failed to read ./record.wav'); | ||
Pa_Terminate; | ||
Exit; | ||
end; | ||
|
||
Initialize(Param); | ||
Param.Device := DeviceIndex; | ||
Param.ChannelCount := 1; | ||
Param.SampleFormat := paFloat32; | ||
param.SuggestedLatency := Pa_GetDeviceInfo(DeviceIndex)^.DefaultHighOutputLatency; | ||
param.HostApiSpecificStreamInfo := nil; | ||
|
||
Buffer := TSherpaOnnxCircularBuffer.Create(Length(Wave.Samples)); | ||
Buffer.Push(Wave.Samples); | ||
|
||
Status := Pa_OpenStream(stream, nil, @Param, Wave.SampleRate, paFramesPerBufferUnspecified, paNoFlag, | ||
PPaStreamCallback(@PlayCallback), nil); | ||
|
||
if Status <> paNoError then | ||
begin | ||
WriteLn('Failed to open stream, ', Pa_GetErrorText(Status)); | ||
Pa_Terminate; | ||
Exit; | ||
end; | ||
|
||
Status := Pa_StartStream(stream); | ||
if Status <> paNoError then | ||
begin | ||
WriteLn('Failed to start stream, ', Pa_GetErrorText(Status)); | ||
Pa_Terminate; | ||
Exit; | ||
end; | ||
|
||
while Buffer.Size > 0 do | ||
Pa_Sleep(100); {sleep for 0.1 second } | ||
|
||
Status := Pa_CloseStream(stream); | ||
if Status <> paNoError then | ||
begin | ||
WriteLn('Failed to close stream, ', Pa_GetErrorText(Status)); | ||
Exit; | ||
end; | ||
|
||
Status := Pa_Terminate; | ||
if Status <> paNoError then | ||
begin | ||
WriteLn('Failed to deinitialize portaudio, ', Pa_GetErrorText(Status)); | ||
Exit; | ||
end; | ||
end. | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,139 @@ | ||
{ Copyright (c) 2024 Xiaomi Corporation } | ||
{ | ||
This file shows how to use portaudio for recording. | ||
It records for 10 seconds and saves the audio samples to ./record.wav | ||
} | ||
program main; | ||
|
||
{$mode objfpc} | ||
|
||
uses | ||
portaudio, | ||
sherpa_onnx, | ||
dos, | ||
ctypes, | ||
SysUtils; | ||
|
||
var | ||
Version: String; | ||
EnvStr: String; | ||
Status: Integer; | ||
NumDevices: Integer; | ||
DeviceIndex: Integer; | ||
DeviceInfo: PPaDeviceInfo; | ||
I: Integer; | ||
Param: TPaStreamParameters; | ||
SampleRate: Double; | ||
Stream: PPaStream; | ||
|
||
Buffer: TSherpaOnnxCircularBuffer; | ||
AllSamples: TSherpaOnnxSamplesArray; | ||
|
||
function RecordCallback( | ||
input: Pointer; output: Pointer; | ||
frameCount: culong; | ||
timeInfo: PPaStreamCallbackTimeInfo; | ||
statusFlags: TPaStreamCallbackFlags; | ||
userData: Pointer ): cint; cdecl; | ||
begin | ||
Buffer.Push(pcfloat(input), frameCount); | ||
Result := paContinue; | ||
end; | ||
|
||
|
||
|
||
begin | ||
Version := String(Pa_GetVersionText); | ||
WriteLn('Version is ', Version); | ||
Status := Pa_Initialize; | ||
if Status <> paNoError then | ||
begin | ||
WriteLn('Failed to initialize portaudio, ', Pa_GetErrorText(Status)); | ||
Exit; | ||
end; | ||
|
||
NumDevices := Pa_GetDeviceCount; | ||
WriteLn('Num devices: ', NumDevices); | ||
|
||
DeviceIndex := Pa_GetDefaultInputDevice; | ||
|
||
if DeviceIndex = paNoDevice then | ||
begin | ||
WriteLn('No default input device found'); | ||
Pa_Terminate; | ||
Exit; | ||
end; | ||
|
||
EnvStr := GetEnv('SHERPA_ONNX_MIC_DEVICE'); | ||
if EnvStr <> '' then | ||
begin | ||
DeviceIndex := StrToIntDef(EnvStr, DeviceIndex); | ||
WriteLn('Use device index from environment variable SHERPA_ONNX_MIC_DEVICE: ', EnvStr); | ||
end; | ||
|
||
for I := 0 to (NumDevices - 1) do | ||
begin | ||
DeviceInfo := Pa_GetDeviceInfo(I); | ||
if I = DeviceIndex then | ||
{ WriteLn(Format(' * %d %s', [I, DeviceInfo^.Name])) } | ||
WriteLn(Format(' * %d %s', [I, AnsiString(DeviceInfo^.Name)])) | ||
else | ||
WriteLn(Format(' %d %s', [I, AnsiString(DeviceInfo^.Name)])); | ||
end; | ||
|
||
WriteLn('Use device ', DeviceIndex); | ||
WriteLn(' Name ', Pa_GetDeviceInfo(DeviceIndex)^.Name); | ||
WriteLn(' Max input channels ', Pa_GetDeviceInfo(DeviceIndex)^.MaxInputChannels); | ||
|
||
Initialize(Param); | ||
Param.Device := DeviceIndex; | ||
Param.ChannelCount := 1; | ||
Param.SampleFormat := paFloat32; | ||
param.SuggestedLatency := Pa_GetDeviceInfo(DeviceIndex)^.DefaultHighInputLatency; | ||
param.HostApiSpecificStreamInfo := nil; | ||
|
||
SampleRate := 48000; | ||
Buffer := TSherpaOnnxCircularBuffer.Create(Round(SampleRate) * 20); | ||
|
||
Status := Pa_OpenStream(stream, @Param, nil, SampleRate, paFramesPerBufferUnspecified, paNoFlag, | ||
PPaStreamCallback(@RecordCallback), nil); | ||
|
||
if Status <> paNoError then | ||
begin | ||
WriteLn('Failed to open stream, ', Pa_GetErrorText(Status)); | ||
Pa_Terminate; | ||
Exit; | ||
end; | ||
|
||
Status := Pa_StartStream(stream); | ||
if Status <> paNoError then | ||
begin | ||
WriteLn('Failed to start stream, ', Pa_GetErrorText(Status)); | ||
Pa_Terminate; | ||
Exit; | ||
end; | ||
|
||
WriteLn('Please speak! It will exit after 10 seconds.'); | ||
Pa_Sleep(10000); {sleep for 10 seconds } | ||
|
||
Status := Pa_CloseStream(stream); | ||
if Status <> paNoError then | ||
begin | ||
WriteLn('Failed to close stream, ', Pa_GetErrorText(Status)); | ||
Exit; | ||
end; | ||
|
||
AllSamples := Buffer.Get(0, Buffer.Size); | ||
|
||
SherpaOnnxWriteWave('record.wav', AllSamples, Round(SampleRate)); | ||
WriteLn('Saved to record.wav'); | ||
|
||
Status := Pa_Terminate; | ||
if Status <> paNoError then | ||
begin | ||
WriteLn('Failed to deinitialize portaudio, ', Pa_GetErrorText(Status)); | ||
Exit; | ||
end; | ||
end. | ||
|
Oops, something went wrong.