PTG-Kitware · derkmed · Feb 1, 2024 · Dec 21, 2023 · Dec 21, 2023 · Dec 21, 2023
diff --git a/angel-docker-build.sh b/angel-docker-build.sh
@@ -19,7 +19,7 @@ Build the PTG ANGEL system docker container images.
 
 Options:
   -h | --help   Display this message.
-  --force       Force image building regardless of workspace hygiene.f
+  -f | --force  Force image building regardless of workspace hygiene.
 "
 }
 
@@ -32,7 +32,7 @@ do
       usage
       exit 0
       ;;
-    --force)
+    -f|--force)
       log "Forcing build regardless of workspace hygiene."
       shift
       FORCE_BUILD=1
@@ -113,4 +113,4 @@ get_docker_compose_cmd DC_CMD
   --env-file "$SCRIPT_DIR"/docker/.env \
   -f "$SCRIPT_DIR"/docker/docker-compose.yml \
   --profile build-only \
-  build "$@"
+  build "${dc_forward_params[@]}" "$@"
diff --git a/docker/.env b/docker/.env
@@ -32,3 +32,10 @@ RMW_IMPLEMENTATION=rmw_cyclonedds_cpp
 
 # This must specify the network interface for CycloneDDS to use.
 CYCLONE_DDS_INTERFACE=lo
+
+# Starting with the docker compose plugin (v2), the whole compose file will be
+# validated, even for services not being run. This provides a valid "default"
+# path to cause validation to succeed. This variable should be overridden when
+# attempting to actually run a service that makes use of this variable.
+# Path considered relative to where the docker-compose file is located.
+XAUTH_FILEPATH=../.container_xauth/.placeholder
diff --git a/ros/angel_msgs/CMakeLists.txt b/ros/angel_msgs/CMakeLists.txt
@@ -28,6 +28,7 @@ set( message_files
   msg/AruiObject3d.msg
   msg/AruiUpdate.msg
   msg/AruiUserNotification.msg
+  msg/DialogueUtterance.msg
   msg/EyeGazeData.msg
   msg/HandJointPose.msg
   msg/HandJointPosesUpdate.msg

diff --git a/ros/angel_msgs/msg/DialogueUtterance.msg b/ros/angel_msgs/msg/DialogueUtterance.msg
@@ -0,0 +1,24 @@
+#
+# Dialogue Utterance with additional information about the environmental state
+# and user model. 
+#
+
+# The header primarily encapsulates when this message was emitted.
+# The time component of this may be utilized as an identifier for this user
+# intent and utterance.
+std_msgs/Header header
+
+# Speech-to-text of the user utterance we have interpreted
+string utterance_text
+
+# Below are optional fields 
+
+# Canonical user intent that has been interpreted. "Canonical" in this context
+# is to mean that this string may be used as an identifier of this type of
+# user intent. Should be in the range [0,1] where 1.0 means absolute confidence.
+string intent
+float64 intent_confidence_score
+
+# Emotion classification. Should be in the range [0,1] where 1.0 means absolute confidence.
+string emotion
+float64 emotion_confidence_score
diff --git a/ros/angel_system_nodes/angel_system_nodes/audio/asr.py b/ros/angel_system_nodes/angel_system_nodes/audio/asr.py
@@ -11,7 +11,7 @@
 from rclpy.node import Node
 import simpleaudio as sa
 
-from angel_msgs.msg import HeadsetAudioData, Utterance
+from angel_msgs.msg import HeadsetAudioData, DialogueUtterance
 from angel_utils import make_default_main
 
 
@@ -106,7 +106,9 @@ def __init__(self):
         self.subscription = self.create_subscription(
             HeadsetAudioData, self._audio_topic, self.listener_callback, 1
         )
-        self._publisher = self.create_publisher(Utterance, self._utterances_topic, 1)
+        self._publisher = self.create_publisher(
+            DialogueUtterance, self._utterances_topic, 1
+        )
 
         self.audio_stream = []
         self.t = threading.Thread()
@@ -203,17 +205,22 @@ def asr_server_request_thread(self, audio_data, num_channels, sample_rate):
             if response:
                 response_text = json.loads(response.text)["text"]
                 self.log.info("Complete ASR text is:\n" + f'"{response_text}"')
-                if self._is_sentence_tokenize_mode:
-                    for sentence in sent_tokenize(response_text):
-                        utterance_msg = Utterance()
-                        utterance_msg.value = sentence
-                        self.log.info("Publishing message: " + f'"{sentence}"')
-                        self._publisher.publish(utterance_msg)
-                else:
-                    utterance_msg = Utterance()
-                    utterance_msg.value = response_text
-                    self.log.info("Publishing message: " + f'"{response_text}"')
-                    self._publisher.publish(utterance_msg)
+                self._publish_response(response_text, self._is_sentence_tokenize_mode)
+
+    def _publish_response(self, response_text: str, tokenize_sentences: bool):
+        if tokenize_sentences:
+            for sentence in sent_tokenize(response_text):
+                self._publisher.publish(self._construct_dialogue_utterance(sentence))
+        else:
+            self._publisher.publish(self._construct_dialogue_utterance(response_text))
+
+    def _construct_dialogue_utterance(self, msg_text: str) -> DialogueUtterance:
+        msg = DialogueUtterance()
+        msg.header.frame_id = "ASR"
+        msg.header.stamp = self.get_clock().now().to_msg()
+        msg.utterance_text = msg_text
+        self.log.info("Publishing message: " + f'"{msg_text}"')
+        return msg
 
 
 main = make_default_main(ASR)

diff --git a/ros/angel_system_nodes/angel_system_nodes/audio/dialogue_utterance_processing.py b/ros/angel_system_nodes/angel_system_nodes/audio/dialogue_utterance_processing.py
@@ -0,0 +1,24 @@
+from angel_msgs.msg import DialogueUtterance
+
+
+def copy_dialogue_utterance(
+    msg: DialogueUtterance, node_name, copy_time
+) -> DialogueUtterance:
+    msg = DialogueUtterance()
+    msg.header.frame_id = node_name
+    msg.utterance_text = msg.utterance_text
+
+    # Assign new time for publication.
+    msg.header.stamp = copy_time
+
+    # Copy over intent classification information if present.
+    if msg.intent:
+        msg.intent = msg.intent
+        msg.intent_confidence_score = msg.intent_confidence_score
+
+    # Copy over intent classification information if present.
+    if msg.emotion:
+        msg.emotion = msg.emotion
+        msg.emotion_confidence_score = msg.emotion_confidence_score
+
+    return msg
diff --git a/ros/angel_system_nodes/angel_system_nodes/audio/emotion/__init__.py b/ros/angel_system_nodes/angel_system_nodes/audio/emotion/__init__.py
diff --git a/ros/angel_system_nodes/angel_system_nodes/audio/emotion/base_emotion_detector.py b/ros/angel_system_nodes/angel_system_nodes/audio/emotion/base_emotion_detector.py
@@ -4,14 +4,14 @@
 import threading
 from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
 
-from angel_msgs.msg import InterpretedAudioUserEmotion, InterpretedAudioUserIntent
+from angel_msgs.msg import DialogueUtterance
 from angel_utils import declare_and_get_parameters
 from angel_utils import make_default_main
+from angel_system_nodes.audio import dialogue_utterance_processing
 
 
-IN_EXPECT_USER_INTENT_TOPIC = "expect_user_intent_topic"
-IN_INTERP_USER_INTENT_TOPIC = "interp_user_intent_topic"
-OUT_INTERP_USER_EMOTION_TOPIC = "user_emotion_topic"
+IN_TOPIC = "input_topic"
+OUT_TOPIC = "user_emotion_topic"
 
 # Currently supported emotions. This is tied with the emotions
 # output to VaderSentiment (https://github.com/cjhutto/vaderSentiment) and
@@ -26,8 +26,8 @@
 
 class BaseEmotionDetector(Node):
     """
-    As of Q22023, emotion detection is derived via VaderSentiment
-    (https://github.com/cjhutto/vaderSentiment).
+    This is the base emotion detection node that other emotion detection nodes
+    should inherit from.
     """
 
     def __init__(self):
@@ -38,32 +38,22 @@ def __init__(self):
         param_values = declare_and_get_parameters(
             self,
             [
-                (IN_EXPECT_USER_INTENT_TOPIC,),
-                (IN_INTERP_USER_INTENT_TOPIC,),
-                (OUT_INTERP_USER_EMOTION_TOPIC,),
+                (IN_TOPIC,),
+                (OUT_TOPIC,),
             ],
         )
 
-        self._in_expect_uintent_topic = param_values[IN_EXPECT_USER_INTENT_TOPIC]
-        self._in_interp_uintent_topic = param_values[IN_INTERP_USER_INTENT_TOPIC]
-        self._out_interp_uemotion_topic = param_values[OUT_INTERP_USER_EMOTION_TOPIC]
+        self._in_topic = param_values[IN_TOPIC]
+        self._out_topic = param_values[OUT_TOPIC]
 
         # Handle subscription/publication topics.
-        self.expect_uintent_subscription = self.create_subscription(
-            InterpretedAudioUserIntent,
-            self._in_expect_uintent_topic,
-            self.intent_detection_callback,
+        self._subscription = self.create_subscription(
+            DialogueUtterance,
+            self._in_topic,
+            self.emotion_detection_callback,
             1,
         )
-        self.interp_uintent_subscription = self.create_subscription(
-            InterpretedAudioUserIntent,
-            self._in_interp_uintent_topic,
-            self.intent_detection_callback,
-            1,
-        )
-        self._interp_emo_publisher = self.create_publisher(
-            InterpretedAudioUserEmotion, self._out_interp_uemotion_topic, 1
-        )
+        self._publication = self.create_publisher(DialogueUtterance, self._out_topic, 1)
 
         self.message_queue = queue.Queue()
         self.handler_thread = threading.Thread(target=self.process_message_queue)
@@ -95,14 +85,14 @@ def _get_vader_sentiment_analysis(self, utterance: str):
         )
         return (classification, confidence)
 
-    def get_inference(self, msg):
+    def get_inference(self, msg: DialogueUtterance):
         """
         Abstract away the different model inference calls depending on the
         node's configure model mode.
         """
         return self._get_vader_sentiment_analysis(msg.utterance_text)
 
-    def intent_detection_callback(self, msg):
+    def emotion_detection_callback(self, msg: DialogueUtterance):
         """
         This is the main ROS node listener callback loop that will process
         all messages received via subscribed topics.
@@ -119,29 +109,29 @@ def process_message_queue(self):
         while True:
             msg = self.message_queue.get()
             self.log.debug(f'Processing message:\n\n"{msg.utterance_text}"')
-            classification, confidence_score = self.get_inference(msg)
-            self.publish_detected_emotion(
-                msg.utterance_text, classification, confidence_score
-            )
-
-    def publish_detected_emotion(
-        self, utterance: str, classification: str, confidence_score: float
-    ):
+            self.process_message(msg)
+
+    def process_message(self, msg: DialogueUtterance):
         """
         Handles message publishing for an utterance with a detected emotion classification.
         """
-        emotion_msg = InterpretedAudioUserEmotion()
-        emotion_msg.header.frame_id = "Emotion Detection"
-        emotion_msg.header.stamp = self.get_clock().now().to_msg()
-        emotion_msg.utterance_text = utterance
-        emotion_msg.user_emotion = classification
-        emotion_msg.confidence = confidence_score
-        self._interp_emo_publisher.publish(emotion_msg)
-        colored_utterance = colored(utterance, "light_blue")
-        colored_emotion = colored(classification, "light_green")
+        classification, confidence_score = self.get_inference(msg)
+        pub_msg = dialogue_utterance_processing.copy_dialogue_utterance(
+            msg,
+            node_name="Emotion Detection",
+            copy_time=self.get_clock().now().to_msg(),
+        )
+        # Overwrite the user emotion with the latest classification information.
+        pub_msg.emotion = classification
+        pub_msg.emotion_confidence_score = confidence_score
+        self.emotion_publication.publish(pub_msg)
+
+        # Log emotion detection information.
+        colored_utterance = colored(pub_msg.utterance_text, "light_blue")
+        colored_emotion = colored(pub_msg.emotion, "light_green")
         self.log.info(
             f'Publishing {{"{colored_emotion}": {confidence_score}}} '
-            + f'to {self._out_interp_uemotion_topic} for:\n>>> "{colored_utterance}"'
+            + f'to {self._out_topic} for:\n>>> "{colored_utterance}"'
         )
 
     def _apply_filter(self, msg):
@@ -150,10 +140,6 @@ def _apply_filter(self, msg):
         none if the message should be filtered out. Else, return the incoming
         msg if it can be included.
         """
-        # if msg.user_intent.lower() == "user inquiry":
-        #     return msg
-        # else:
-        #     return None
         return msg
 
 

diff --git a/ros/angel_system_nodes/angel_system_nodes/audio/emotion/gpt_emotion_detector.py b/ros/angel_system_nodes/angel_system_nodes/audio/emotion/gpt_emotion_detector.py
@@ -8,7 +8,7 @@
     BaseEmotionDetector,
     LABEL_MAPPINGS,
 )
-from angel_utils import make_default_main
+from angel_utils import declare_and_get_parameters, make_default_main
 
 openai.organization = os.getenv("OPENAI_ORG_ID")
 openai.api_key = os.getenv("OPENAI_API_KEY")
@@ -23,12 +23,22 @@
     {"utterance": "We're doing great and I'm learning a lot!", "label": "positive"},
 ]
 
+PARAM_TIMEOUT = "timeout"
+
 
 class GptEmotionDetector(BaseEmotionDetector):
     def __init__(self):
         super().__init__()
         self.log = self.get_logger()
 
+        param_values = declare_and_get_parameters(
+            self,
+            [
+                (PARAM_TIMEOUT, 600),
+            ],
+        )
+        self.timeout = param_values[PARAM_TIMEOUT]
+
         # This node additionally includes fields for interacting with OpenAI
         # via LangChain.
         if not os.getenv("OPENAI_API_KEY"):

diff --git a/ros/angel_system_nodes/angel_system_nodes/audio/intent/__init__.py b/ros/angel_system_nodes/angel_system_nodes/audio/intent/__init__.py