Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor audio message to Dialogue Utterance #384

Open
wants to merge 19 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions angel-docker-build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ Build the PTG ANGEL system docker container images.

Options:
-h | --help Display this message.
--force Force image building regardless of workspace hygiene.f
-f | --force Force image building regardless of workspace hygiene.
"
}

Expand All @@ -32,7 +32,7 @@ do
usage
exit 0
;;
--force)
-f|--force)
log "Forcing build regardless of workspace hygiene."
shift
FORCE_BUILD=1
Expand Down Expand Up @@ -113,4 +113,4 @@ get_docker_compose_cmd DC_CMD
--env-file "$SCRIPT_DIR"/docker/.env \
-f "$SCRIPT_DIR"/docker/docker-compose.yml \
--profile build-only \
build "$@"
build "${dc_forward_params[@]}" "$@"
7 changes: 7 additions & 0 deletions docker/.env
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,10 @@ RMW_IMPLEMENTATION=rmw_cyclonedds_cpp

# This must specify the network interface for CycloneDDS to use.
CYCLONE_DDS_INTERFACE=lo

# Starting with the docker compose plugin (v2), the whole compose file will be
# validated, even for services not being run. This provides a valid "default"
# path to cause validation to succeed. This variable should be overridden when
# attempting to actually run a service that makes use of this variable.
# Path considered relative to where the docker-compose file is located.
XAUTH_FILEPATH=../.container_xauth/.placeholder
1 change: 1 addition & 0 deletions ros/angel_msgs/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ set( message_files
msg/AruiObject3d.msg
msg/AruiUpdate.msg
msg/AruiUserNotification.msg
msg/DialogueUtterance.msg
msg/EyeGazeData.msg
msg/HandJointPose.msg
msg/HandJointPosesUpdate.msg
Expand Down
24 changes: 24 additions & 0 deletions ros/angel_msgs/msg/DialogueUtterance.msg
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#
# Dialogue Utterance with additional information about the environmental state
# and user model.
#

# The header primarily encapsulates when this message was emitted.
# The time component of this may be utilized as an identifier for this user
# intent and utterance.
std_msgs/Header header

# Speech-to-text of the user utterance we have interpreted
string utterance_text

# Below are optional fields

# Canonical user intent that has been interpreted. "Canonical" in this context
# is to mean that this string may be used as an identifier of this type of
# user intent. Should be in the range [0,1] where 1.0 means absolute confidence.
string intent
float64 intent_confidence_score

# Emotion classification. Should be in the range [0,1] where 1.0 means absolute confidence.
string emotion
float64 emotion_confidence_score
33 changes: 20 additions & 13 deletions ros/angel_system_nodes/angel_system_nodes/audio/asr.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from rclpy.node import Node
import simpleaudio as sa

from angel_msgs.msg import HeadsetAudioData, Utterance
from angel_msgs.msg import HeadsetAudioData, DialogueUtterance
from angel_utils import make_default_main


Expand Down Expand Up @@ -106,7 +106,9 @@ def __init__(self):
self.subscription = self.create_subscription(
HeadsetAudioData, self._audio_topic, self.listener_callback, 1
)
self._publisher = self.create_publisher(Utterance, self._utterances_topic, 1)
self._publisher = self.create_publisher(
DialogueUtterance, self._utterances_topic, 1
)

self.audio_stream = []
self.t = threading.Thread()
Expand Down Expand Up @@ -203,17 +205,22 @@ def asr_server_request_thread(self, audio_data, num_channels, sample_rate):
if response:
response_text = json.loads(response.text)["text"]
self.log.info("Complete ASR text is:\n" + f'"{response_text}"')
if self._is_sentence_tokenize_mode:
for sentence in sent_tokenize(response_text):
utterance_msg = Utterance()
utterance_msg.value = sentence
self.log.info("Publishing message: " + f'"{sentence}"')
self._publisher.publish(utterance_msg)
else:
utterance_msg = Utterance()
utterance_msg.value = response_text
self.log.info("Publishing message: " + f'"{response_text}"')
self._publisher.publish(utterance_msg)
self._publish_response(response_text, self._is_sentence_tokenize_mode)

def _publish_response(self, response_text: str, tokenize_sentences: bool):
if tokenize_sentences:
for sentence in sent_tokenize(response_text):
self._publisher.publish(self._construct_dialogue_utterance(sentence))
else:
self._publisher.publish(self._construct_dialogue_utterance(response_text))

def _construct_dialogue_utterance(self, msg_text: str) -> DialogueUtterance:
msg = DialogueUtterance()
msg.header.frame_id = "ASR"
msg.header.stamp = self.get_clock().now().to_msg()
msg.utterance_text = msg_text
self.log.info("Publishing message: " + f'"{msg_text}"')
return msg


main = make_default_main(ASR)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
from angel_msgs.msg import DialogueUtterance


def copy_dialogue_utterance(
msg: DialogueUtterance, node_name, copy_time
) -> DialogueUtterance:
msg = DialogueUtterance()
msg.header.frame_id = node_name
msg.utterance_text = msg.utterance_text

# Assign new time for publication.
msg.header.stamp = copy_time

# Copy over intent classification information if present.
if msg.intent:
msg.intent = msg.intent
msg.intent_confidence_score = msg.intent_confidence_score

# Copy over intent classification information if present.
if msg.emotion:
msg.emotion = msg.emotion
msg.emotion_confidence_score = msg.emotion_confidence_score

return msg
Empty file.
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,14 @@
import threading
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

from angel_msgs.msg import InterpretedAudioUserEmotion, InterpretedAudioUserIntent
from angel_msgs.msg import DialogueUtterance
from angel_utils import declare_and_get_parameters
from angel_utils import make_default_main
from angel_system_nodes.audio import dialogue_utterance_processing


IN_EXPECT_USER_INTENT_TOPIC = "expect_user_intent_topic"
IN_INTERP_USER_INTENT_TOPIC = "interp_user_intent_topic"
OUT_INTERP_USER_EMOTION_TOPIC = "user_emotion_topic"
IN_TOPIC = "input_topic"
OUT_TOPIC = "user_emotion_topic"

# Currently supported emotions. This is tied with the emotions
# output to VaderSentiment (https://github.com/cjhutto/vaderSentiment) and
Expand All @@ -26,8 +26,8 @@

class BaseEmotionDetector(Node):
"""
As of Q22023, emotion detection is derived via VaderSentiment
(https://github.com/cjhutto/vaderSentiment).
This is the base emotion detection node that other emotion detection nodes
should inherit from.
"""

def __init__(self):
Expand All @@ -38,32 +38,22 @@ def __init__(self):
param_values = declare_and_get_parameters(
self,
[
(IN_EXPECT_USER_INTENT_TOPIC,),
(IN_INTERP_USER_INTENT_TOPIC,),
(OUT_INTERP_USER_EMOTION_TOPIC,),
(IN_TOPIC,),
(OUT_TOPIC,),
],
)

self._in_expect_uintent_topic = param_values[IN_EXPECT_USER_INTENT_TOPIC]
self._in_interp_uintent_topic = param_values[IN_INTERP_USER_INTENT_TOPIC]
self._out_interp_uemotion_topic = param_values[OUT_INTERP_USER_EMOTION_TOPIC]
self._in_topic = param_values[IN_TOPIC]
self._out_topic = param_values[OUT_TOPIC]

# Handle subscription/publication topics.
self.expect_uintent_subscription = self.create_subscription(
InterpretedAudioUserIntent,
self._in_expect_uintent_topic,
self.intent_detection_callback,
self._subscription = self.create_subscription(
DialogueUtterance,
self._in_topic,
self.emotion_detection_callback,
1,
)
self.interp_uintent_subscription = self.create_subscription(
InterpretedAudioUserIntent,
self._in_interp_uintent_topic,
self.intent_detection_callback,
1,
)
self._interp_emo_publisher = self.create_publisher(
InterpretedAudioUserEmotion, self._out_interp_uemotion_topic, 1
)
self._publication = self.create_publisher(DialogueUtterance, self._out_topic, 1)

self.message_queue = queue.Queue()
self.handler_thread = threading.Thread(target=self.process_message_queue)
Expand Down Expand Up @@ -95,14 +85,14 @@ def _get_vader_sentiment_analysis(self, utterance: str):
)
return (classification, confidence)

def get_inference(self, msg):
def get_inference(self, msg: DialogueUtterance):
"""
Abstract away the different model inference calls depending on the
node's configure model mode.
"""
return self._get_vader_sentiment_analysis(msg.utterance_text)

def intent_detection_callback(self, msg):
def emotion_detection_callback(self, msg: DialogueUtterance):
"""
This is the main ROS node listener callback loop that will process
all messages received via subscribed topics.
Expand All @@ -119,29 +109,29 @@ def process_message_queue(self):
while True:
msg = self.message_queue.get()
self.log.debug(f'Processing message:\n\n"{msg.utterance_text}"')
classification, confidence_score = self.get_inference(msg)
self.publish_detected_emotion(
msg.utterance_text, classification, confidence_score
)

def publish_detected_emotion(
self, utterance: str, classification: str, confidence_score: float
):
self.process_message(msg)

def process_message(self, msg: DialogueUtterance):
"""
Handles message publishing for an utterance with a detected emotion classification.
"""
emotion_msg = InterpretedAudioUserEmotion()
emotion_msg.header.frame_id = "Emotion Detection"
emotion_msg.header.stamp = self.get_clock().now().to_msg()
emotion_msg.utterance_text = utterance
emotion_msg.user_emotion = classification
emotion_msg.confidence = confidence_score
self._interp_emo_publisher.publish(emotion_msg)
colored_utterance = colored(utterance, "light_blue")
colored_emotion = colored(classification, "light_green")
classification, confidence_score = self.get_inference(msg)
pub_msg = dialogue_utterance_processing.copy_dialogue_utterance(
msg,
node_name="Emotion Detection",
copy_time=self.get_clock().now().to_msg(),
)
# Overwrite the user emotion with the latest classification information.
pub_msg.emotion = classification
pub_msg.emotion_confidence_score = confidence_score
self.emotion_publication.publish(pub_msg)

# Log emotion detection information.
colored_utterance = colored(pub_msg.utterance_text, "light_blue")
colored_emotion = colored(pub_msg.emotion, "light_green")
self.log.info(
f'Publishing {{"{colored_emotion}": {confidence_score}}} '
+ f'to {self._out_interp_uemotion_topic} for:\n>>> "{colored_utterance}"'
+ f'to {self._out_topic} for:\n>>> "{colored_utterance}"'
)

def _apply_filter(self, msg):
Expand All @@ -150,10 +140,6 @@ def _apply_filter(self, msg):
none if the message should be filtered out. Else, return the incoming
msg if it can be included.
"""
# if msg.user_intent.lower() == "user inquiry":
# return msg
# else:
# return None
return msg


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
BaseEmotionDetector,
LABEL_MAPPINGS,
)
from angel_utils import make_default_main
from angel_utils import declare_and_get_parameters, make_default_main

openai.organization = os.getenv("OPENAI_ORG_ID")
openai.api_key = os.getenv("OPENAI_API_KEY")
Expand All @@ -23,12 +23,22 @@
{"utterance": "We're doing great and I'm learning a lot!", "label": "positive"},
]

PARAM_TIMEOUT = "timeout"


class GptEmotionDetector(BaseEmotionDetector):
def __init__(self):
super().__init__()
self.log = self.get_logger()

param_values = declare_and_get_parameters(
self,
[
(PARAM_TIMEOUT, 600),
],
)
self.timeout = param_values[PARAM_TIMEOUT]

# This node additionally includes fields for interacting with OpenAI
# via LangChain.
if not os.getenv("OPENAI_API_KEY"):
Expand Down
Empty file.
Loading
Loading