-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #16 from Verdanix/create_audio_system
- Loading branch information
Showing
16 changed files
with
998 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
<?xml version="1.0" encoding="UTF-8"?> | ||
<project xmlns="http://maven.apache.org/POM/4.0.0" | ||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" | ||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> | ||
<name>audio</name> | ||
<description> | ||
The audio module with all the audio components of Nora. | ||
</description> | ||
<artifactId>audio</artifactId> | ||
<packaging>jar</packaging> | ||
<modelVersion>4.0.0</modelVersion> | ||
|
||
<parent> | ||
<groupId>dev.masonroot</groupId> | ||
<artifactId>Nora</artifactId> | ||
<version>1.0.0-BETA</version> | ||
</parent> | ||
|
||
<properties> | ||
<logback.classic.version>1.5.12</logback.classic.version> | ||
<slf4j.version>2.0.16</slf4j.version> | ||
<vosk.version>0.3.45</vosk.version> | ||
</properties> | ||
|
||
|
||
<dependencies> | ||
<!-- Common module --> | ||
<!-- This dependency adds Nora's common module --> | ||
<dependency> | ||
<groupId>dev.masonroot</groupId> | ||
<artifactId>common</artifactId> | ||
<version>LATEST</version> | ||
</dependency> | ||
|
||
<!-- Sherpa ONNX --> | ||
<!-- This dependency adds TTS --> | ||
<!-- Github: https://github.com/k2-fsa/sherpa-onnx/ --> | ||
<!-- Documentation: https://k2-fsa.github.io/sherpa/onnx/ --> | ||
<dependency> | ||
<groupId>com.k2fsa.sherpa.onnx</groupId> | ||
<artifactId>sherpa-onnx</artifactId> | ||
<version>${sherpa.onnx.version}</version> | ||
</dependency> | ||
|
||
<!-- Vosk STT Engine --> | ||
<!-- This dependency adds STT using Vosk --> | ||
<!-- Github: https://github.com/alphacep/vosk-api/ --> | ||
<!-- Documentation: https://alphacephei.com/vosk/ --> | ||
<dependency> | ||
<groupId>com.alphacephei</groupId> | ||
<artifactId>vosk</artifactId> | ||
<version>${vosk.version}</version> | ||
</dependency> | ||
</dependencies> | ||
</project> |
127 changes: 127 additions & 0 deletions
127
audio/src/main/java/dev/masonroot/audio/AudioInterface.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,127 @@ | ||
package dev.masonroot.audio; | ||
|
||
import dev.masonroot.common.NoraLogger; | ||
import java.io.ByteArrayOutputStream; | ||
import javax.sound.sampled.LineUnavailableException; | ||
import javax.sound.sampled.SourceDataLine; | ||
import javax.sound.sampled.TargetDataLine; | ||
import lombok.NonNull; | ||
|
||
/** | ||
* Audio interface for handling audio input and output. | ||
* | ||
* <p>This class provides methods to read audio data from a microphone and write audio data to a | ||
* speaker. It uses the Java Sound API to manage audio input and output streams. | ||
* | ||
* <p><b>Thread Safety:</b> | ||
* | ||
* <ul> | ||
* <li>This class is thread-safe. The read and write methods are synchronized to prevent | ||
* concurrent access. | ||
* </ul> | ||
* | ||
* <p><b>Notes:</b> | ||
* | ||
* <ul> | ||
* <li>This class implements {@code AutoCloseable} to ensure that audio resources are properly | ||
* released. | ||
* <li>Ensure that the microphone and speaker lines are properly initialized before using this | ||
* class. | ||
* </ul> | ||
*/ | ||
public final record AudioInterface(TargetDataLine microphone, SourceDataLine speaker) | ||
implements AutoCloseable { | ||
|
||
/** | ||
* Constructs an {@code AudioInterface} with the specified microphone and speaker lines. | ||
* | ||
* <p>This constructor initializes the audio interface with the provided {@code TargetDataLine} | ||
* for the microphone and {@code SourceDataLine} for the speaker. Both lines must be properly | ||
* initialized before being passed to this constructor. | ||
* | ||
* <p><b>Why:</b> | ||
* | ||
* <ul> | ||
* <li>To facilitate audio input and output operations by managing the microphone and speaker | ||
* lines. | ||
* </ul> | ||
* | ||
* @param microphone the {@code TargetDataLine} for the microphone; must not be null. | ||
* @param speaker the {@code SourceDataLine} for the speaker; must not be null. | ||
*/ | ||
public AudioInterface( | ||
@NonNull final TargetDataLine microphone, @NonNull final SourceDataLine speaker) { | ||
this.microphone = microphone; | ||
this.speaker = speaker; | ||
} | ||
|
||
/** | ||
* Writes audio data to the speaker. | ||
* | ||
* <p>This method opens the speaker line, starts it, and writes the audio data to the speaker. It | ||
* ensures that the audio data is played through the speaker. | ||
* | ||
* <p><b>Why:</b> | ||
* | ||
* <ul> | ||
* <li>To facilitate audio output by writing audio data to the speaker. | ||
* </ul> | ||
* | ||
* @param data the audio data to write to the speaker as a byte array. | ||
*/ | ||
public synchronized void write(@NonNull final byte[] data) { | ||
try { | ||
if (!this.speaker.isOpen()) { | ||
this.speaker.open(); | ||
} | ||
this.speaker.start(); | ||
this.speaker.write(data, 0, data.length); | ||
} catch (IllegalArgumentException | IllegalStateException | LineUnavailableException e) { | ||
NoraLogger.trace("Failed to write audio data to speaker.", e); | ||
} | ||
} | ||
|
||
/** | ||
* Reads audio data from the microphone for the specified duration. | ||
* | ||
* <p>This method opens the microphone line, starts it, reads audio data from it for the specified | ||
* duration, and then returns the audio data as a byte array. It ensures that the audio data is | ||
* captured from the microphone. | ||
* | ||
* <p><b>Why:</b> | ||
* | ||
* <ul> | ||
* <li>To facilitate audio input by reading audio data from the microphone. | ||
* </ul> | ||
* | ||
* @param timeoutInMs the duration in milliseconds to read audio data from the microphone. | ||
* @return the audio data read from the microphone as a byte array. | ||
*/ | ||
public synchronized byte[] read(final long timeoutInMs) { | ||
final byte[] data = new byte[this.microphone.getBufferSize() / 5]; | ||
final ByteArrayOutputStream out = new ByteArrayOutputStream(); | ||
long elapsedTime = 0; | ||
|
||
try { | ||
if (!this.microphone.isOpen()) { | ||
this.microphone.open(); | ||
} | ||
this.microphone.start(); | ||
final long startTime = System.currentTimeMillis(); | ||
while (elapsedTime < timeoutInMs) { | ||
final int bytesRead = this.microphone.read(data, 0, data.length); | ||
out.write(data, 0, bytesRead); | ||
elapsedTime = System.currentTimeMillis() - startTime; | ||
} | ||
} catch (IllegalArgumentException | IllegalStateException | LineUnavailableException e) { | ||
NoraLogger.trace("Failed to read audio data from microphone.", e); | ||
} | ||
return out.toByteArray(); | ||
} | ||
|
||
@Override | ||
public void close() throws Exception { | ||
this.microphone.close(); | ||
this.speaker.close(); | ||
} | ||
} |
42 changes: 42 additions & 0 deletions
42
audio/src/main/java/dev/masonroot/audio/exceptions/EngineInitializationException.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
package dev.masonroot.audio.exceptions; | ||
|
||
import lombok.NonNull; | ||
|
||
/** | ||
* Exception thrown when an engine fails to initialize. | ||
* | ||
* <p>This exception is used to indicate issues during the initialization of an engine, such as when | ||
* the model path is invalid or the model cannot be loaded. | ||
* | ||
* <p><b>Notes:</b> | ||
* | ||
* <ul> | ||
* <li>This exception extends {@code RuntimeException} to provide more specific error information | ||
* related to an engine initialization. | ||
* </ul> | ||
*/ | ||
public class EngineInitializationException extends RuntimeException { | ||
/** | ||
* Constructs a new {@code EngineInitialization} with the specified detail message. | ||
* | ||
* <p><b>Why:</b> | ||
* | ||
* <ul> | ||
* <li>To provide a specific error message related to an engine initialization failures. | ||
* </ul> | ||
* | ||
* <p><b>Notes:</b> | ||
* | ||
* <ul> | ||
* <li>The {@code @NonNull} annotation indicates that the parameter should not be null. | ||
* </ul> | ||
* | ||
* @param message the detail message; must not be null. | ||
* @param cause the cause of the exception; must not be null. | ||
* @throws NullPointerException if the message is null. | ||
*/ | ||
public EngineInitializationException( | ||
@NonNull final String message, @NonNull final Throwable cause) { | ||
super(message, cause); | ||
} | ||
} |
41 changes: 41 additions & 0 deletions
41
audio/src/main/java/dev/masonroot/audio/exceptions/PiperInitializationException.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
package dev.masonroot.audio.exceptions; | ||
|
||
import java.nio.file.Path; | ||
|
||
/** | ||
* Exception thrown when the Piper TTS engine fails to initialize with the specified detail message. | ||
* | ||
* <p>This exception is used to indicate issues during the initialization of the Piper | ||
* text-to-speech engine, such as when the model path is invalid or the model cannot be loaded. | ||
* | ||
* <p><b>Notes:</b> | ||
* | ||
* <ul> | ||
* <li>This exception extends {@code EngineInitializationException} to provide more specific error | ||
* information related to Piper initialization. | ||
* </ul> | ||
*/ | ||
public final class PiperInitializationException extends EngineInitializationException { | ||
|
||
/** | ||
* Constructs a new {@code EngineInitialization} with the specified detail message and cause. | ||
* | ||
* <p><b>Why:</b> | ||
* | ||
* <ul> | ||
* <li>To provide a specific error message related to an engine initialization failures. | ||
* </ul> | ||
* | ||
* <p><b>Notes:</b> | ||
* | ||
* <ul> | ||
* <li>The {@code @NonNull} annotation indicates that the parameter should not be null. | ||
* </ul> | ||
* | ||
* @param dataDirectory the path to the data directory; must not be null. | ||
* @param cause the cause of the exception; must not be null. | ||
*/ | ||
public PiperInitializationException(Path dataDirectory, Throwable cause) { | ||
super("Failed to initialize Piper engine with data directory: " + dataDirectory, cause); | ||
} | ||
} |
40 changes: 40 additions & 0 deletions
40
audio/src/main/java/dev/masonroot/audio/exceptions/VoskInitializationException.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
package dev.masonroot.audio.exceptions; | ||
|
||
import java.nio.file.Path; | ||
|
||
/** | ||
* Exception thrown when the Vosk STT engine fails to initialize with the specified model path. | ||
* | ||
* <p>This exception is used to indicate issues during the initialization of the Vosk speech | ||
* recognition engine, such as when the model path is invalid or the model cannot be loaded. | ||
* | ||
* <p><b>Notes:</b> | ||
* | ||
* <ul> | ||
* <li>This exception extends {@code SttEngineInitialization} to provide more specific error | ||
* information related to Vosk initialization. | ||
* </ul> | ||
*/ | ||
public final class VoskInitializationException extends EngineInitializationException { | ||
/** | ||
* Constructs a new {@code VoskInitializationException} with the specified model path and cause. | ||
* | ||
* <p><b>Why:</b> | ||
* | ||
* <ul> | ||
* <li>To provide a specific error message related to Vosk initialization failures. | ||
* </ul> | ||
* | ||
* <p><b>Notes:</b> | ||
* | ||
* <ul> | ||
* <li>The {@code @NonNull} annotation indicates that the parameter should not be null. | ||
* </ul> | ||
* | ||
* @param modelPath the path to the Vosk model; must not be null. | ||
* @param cause the cause of the exception; must not be null. | ||
*/ | ||
public VoskInitializationException(Path modelPath, Throwable cause) { | ||
super("Failed to initialize Vosk engine with model path: " + modelPath, cause); | ||
} | ||
} |
11 changes: 11 additions & 0 deletions
11
audio/src/main/java/dev/masonroot/audio/exceptions/package-info.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
/** | ||
* This package contains custom exception classes for the audio module. | ||
* | ||
* <p><b>Why:</b> | ||
* | ||
* <ul> | ||
* <li>To handle specific error conditions related to audio processing. | ||
* <li>To provide meaningful error messages and facilitate debugging. | ||
* </ul> | ||
*/ | ||
package dev.masonroot.audio.exceptions; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
/** | ||
* This package contains classes and interfaces for handling audio input and output operations in | ||
* the Nora application. | ||
* | ||
* <p><b>Why:</b> | ||
* | ||
* <ul> | ||
* <li>To facilitate audio input and output by managing microphone and speaker lines. | ||
* <li>To provide a thread-safe interface for reading and writing audio data. | ||
* </ul> | ||
*/ | ||
package dev.masonroot.audio; |
Oops, something went wrong.