Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Intent Detection Node #296

Merged
merged 1 commit into from
Oct 10, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
164 changes: 164 additions & 0 deletions ros/angel_system_nodes/angel_system_nodes/base_intent_detector.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
import queue
import rclpy
from rclpy.node import Node
from termcolor import colored
import threading

from angel_msgs.msg import InterpretedAudioUserIntent, Utterance
from angel_utils import declare_and_get_parameters

NEXT_STEP_KEYPHRASES = ["skip", "next", "next step"]
PREV_STEP_KEYPHRASES = ["previous", "previous step", "last step", "go back"]
QUESTION_KEYPHRASES = ["question"]
OVERRIDE_KEYPHRASES = ["angel", "angel system"]

# TODO(derekahmed): Please figure out how to keep this sync-ed with
# config/angel_system_cmds/user_intent_to_sys_cmd_v1.yaml.
# Please refer to labels defined in
# https://docs.google.com/document/d/1uuvSL5de3LVM9c0tKpRKYazDxckffRHf7IAcabSw9UA .
INTENT_LABELS = ["next_step", "prev_step", "inquiry", "other"]

UTTERANCES_TOPIC = "utterances_topic"
PARAM_EXPECT_USER_INTENT_TOPIC = "expect_user_intent_topic"
PARAM_INTERP_USER_INTENT_TOPIC = "interp_user_intent_topic"


class BaseIntentDetector(Node):
def __init__(self):
super().__init__(self.__class__.__name__)
self.log = self.get_logger()

# Handle parameterization.
param_values = declare_and_get_parameters(
self,
[
(UTTERANCES_TOPIC,),
(PARAM_EXPECT_USER_INTENT_TOPIC,),
(PARAM_INTERP_USER_INTENT_TOPIC,),
],
)
self._utterances_topic = param_values[UTTERANCES_TOPIC]
self._expect_uintent_topic = param_values[PARAM_EXPECT_USER_INTENT_TOPIC]
self._interp_uintent_topic = param_values[PARAM_INTERP_USER_INTENT_TOPIC]

# Handle subscription/publication topics.
self.subscription = self.create_subscription(
Utterance, self._utterances_topic, self.utterance_callback, 1
)
self._expected_publisher = self.create_publisher(
InterpretedAudioUserIntent, self._expect_uintent_topic, 1
)
self._interp_publisher = self.create_publisher(
InterpretedAudioUserIntent, self._interp_uintent_topic, 1
)

self.utterance_message_queue = queue.Queue()
self.handler_thread = threading.Thread(
target=self.process_utterance_message_queue
)
self.handler_thread.start()

def utterance_callback(self, msg):
"""
This is the main ROS node listener callback loop that will process all messages received
via subscribed topics.
"""
self.log.debug(f'Received message:\n\n"{msg.value}"')
self.utterance_message_queue.put(msg)

def process_utterance_message_queue(self):
"""
Constant loop to process received messages.
"""
while True:
msg = self.utterance_message_queue.get()
self.log.debug(f'Processing message:\n\n"{msg.value}"')
intent, score = self.detect_intents(msg)
if not intent:
continue
self.publish_msg(msg.value, intent, score)

def detect_intents(self, msg):
"""
Keyphrase search for intent detection. This implementation does simple
string matching to assign a detected label. When multiple intents are
detected, the message is classified as the first intent or as an
'inquiry' if 'inquiry' is one of the classifications.
"""

def _tiebreak_intents(intents, confidences):
classification = intents[0]
score = confidences[0]
if len(intents) > 1:
for i, intent in enumerate(intents):
if intent == INTENT_LABELS[2]:
derkmed marked this conversation as resolved.
Show resolved Hide resolved
classification, score = intent, confidences[i]
self.log.info(
f'Detected multiple intents: {intents}. Selected "{classification}".'
)
return classification, score

lower_utterance = msg.value.lower()
intents = []
confidences = []
if self._contains_phrase(lower_utterance, NEXT_STEP_KEYPHRASES):
intents.append(INTENT_LABELS[0])
confidences.append(0.5)
if self._contains_phrase(lower_utterance, PREV_STEP_KEYPHRASES):
intents.append(INTENT_LABELS[1])
confidences.append(0.5)
if self._contains_phrase(lower_utterance, QUESTION_KEYPHRASES):
intents.append(INTENT_LABELS[2])
confidences.append(0.5)
if not intents:
colored_utterance = colored(msg.value, "light_blue")
self.log.info(f'No intents detected for:\n>>> "{colored_utterance}":')
return None, -1.0

classification, confidence = _tiebreak_intents(intents, confidences)
classification = colored(classification, "light_green")
return classification, confidence

def publish_msg(self, utterance, intent, score):
"""
Handles message publishing for an utterance with a detected intent.
"""
intent_msg = InterpretedAudioUserIntent()
derkmed marked this conversation as resolved.
Show resolved Hide resolved
intent_msg.header.frame_id = "Intent Detection"
intent_msg.header.stamp = self.get_clock().now().to_msg()
intent_msg.utterance_text = utterance
intent_msg.user_intent = intent
intent_msg.confidence = score
published_topic = None
if self._contains_phrase(utterance.lower(), OVERRIDE_KEYPHRASES):
intent_msg.confidence = 1.0
self._expected_publisher.publish(intent_msg)
published_topic = PARAM_EXPECT_USER_INTENT_TOPIC
else:
self._interp_publisher.publish(intent_msg)
published_topic = PARAM_INTERP_USER_INTENT_TOPIC

colored_utterance = colored(utterance, "light_blue")
colored_intent = colored(intent_msg.user_intent, "light_green")
self.log.info(
f'Publishing {{"{colored_intent}": {score}}} to {published_topic} '
+ f'for:\n>>> "{colored_utterance}"'
)

def _contains_phrase(self, utterance, phrases):
for phrase in phrases:
if phrase in utterance:
return True
return False


def main():
rclpy.init()
intent_detector = BaseIntentDetector()
rclpy.spin(intent_detector)
intent_detector.destroy_node()
rclpy.shutdown()


if __name__ == "__main__":
main()
100 changes: 100 additions & 0 deletions ros/angel_system_nodes/angel_system_nodes/gpt_intent_detector.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
from langchain import PromptTemplate, FewShotPromptTemplate
from langchain.chains import LLMChain
from langchain.chat_models import ChatOpenAI
from langchain.llms import OpenAI
import openai
import os
import rclpy

from angel_system_nodes.base_intent_detector import BaseIntentDetector, INTENT_LABELS

openai.organization = os.getenv("OPENAI_ORG_ID")
openai.api_key = os.getenv("OPENAI_API_KEY")

# The following are few shot examples when prompting GPT.
FEW_SHOT_EXAMPLES = [
{"utterance": "Go back to the previous step!", "label": "prev_step."},
{"utterance": "Next step, please.", "label": "next_step"},
{"utterance": "How should I wrap this tourniquet?", "label": "inquiry"},
{"utterance": "The sky is blue", "label": "other"},
]


class GptIntentDetector(BaseIntentDetector):
def __init__(self):
super().__init__()
self.log = self.get_logger()

# This node additionally includes fields for interacting with OpenAI
# via LangChain.
if not os.getenv("OPENAI_API_KEY"):
self.log.info("OPENAI_API_KEY environment variable is unset!")
else:
self.openai_api_key = os.getenv("OPENAI_API_KEY")
if not os.getenv("OPENAI_ORG_ID"):
self.log.info("OPENAI_ORG_ID environment variable is unset!")
else:
self.openai_org_id = os.getenv("OPENAI_ORG_ID")
if not bool(self.openai_api_key and self.openai_org_id):
raise ValueError("Please configure OpenAI API Keys.")
self.chain = self._configure_langchain()

def _configure_langchain(self):
def _labels_list_parenthetical_str(labels):
concat_labels = ", ".join(labels)
return f"({concat_labels})"

def _labels_list_str(labels):
return ", ".join(labels[:-1]) + f" or {labels[-1]}"

all_intents_parenthetical = _labels_list_parenthetical_str(INTENT_LABELS)
all_intents = _labels_list_str(INTENT_LABELS)

# Define the few shot template.
template = (
f"Utterance: {{utterance}}\nIntent {all_intents_parenthetical}: {{label}}"
)
example_prompt = PromptTemplate(
input_variables=["utterance", "label"], template=template
)
prompt_instructions = f"Classify each utterance as {all_intents}.\n"
inference_sample = (
f"Utterance: {{utterance}}\nIntent {all_intents_parenthetical}:"
)
few_shot_prompt = FewShotPromptTemplate(
examples=FEW_SHOT_EXAMPLES,
example_prompt=example_prompt,
prefix=prompt_instructions,
suffix=inference_sample,
input_variables=["utterance"],
example_separator="\n",
)

# Please refer to https://github.com/hwchase17/langchain/blob/master/langchain/llms/openai.py
openai_llm = ChatOpenAI(
model_name="gpt-3.5-turbo",
openai_api_key=self.openai_api_key,
temperature=0.0,
# Only 2 tokens needed for classification (tokens are delimited by use of '_', i.e.
# 'next_step' counts as 2 tokens).
max_tokens=2,
)
return LLMChain(llm=openai_llm, prompt=few_shot_prompt)

def detect_intents(self, msg):
"""
Detects the user intent via langchain execution of GPT.
"""
return self.chain.run(utterance=msg), 0.5


def main():
rclpy.init()
intent_detector = GptIntentDetector()
rclpy.spin(intent_detector)
intent_detector.destroy_node()
rclpy.shutdown()


if __name__ == "__main__":
main()
2 changes: 2 additions & 0 deletions ros/angel_system_nodes/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
"video_listener = angel_system_nodes.video_subscriber:main",
"asr = angel_system_nodes.asr:main",
"voice_activity_detector = angel_system_nodes.voice_activity_detector:main",
"base_intent_detector = angel_system_nodes.base_intent_detector:main",
"gpt_intent_detector = angel_system_nodes.gpt_intent_detector:main",
"intent_detector = angel_system_nodes.intent_detector:main",
"audio_player = angel_system_nodes.audio_player:main",
"frame_publisher = angel_system_nodes.ros_publisher:main",
Expand Down
96 changes: 96 additions & 0 deletions tmux/eval_vocalized_intent_detection.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
#
# Used to evaluate Intent Detection with vocal processing for a specified ROS bag of data
# This configuration should be run by itself (e.g. not in combination with
# another tmuxinator launch).
#
# NOTE: In order to query GPT, you will need to execute
# ```
# export OPENAI_API_KEY="YOUR API KEY"
# export OPENAI_ORG_ID="YOUR ORG ID"
# ```
#

name: Intent Detection with VAD
root: <%= ENV["ANGEL_WORKSPACE_DIR"] %>

# Optional tmux socket
# socket_name: foo

# Note that the pre and post options have been deprecated and will be replaced by
# project hooks.

# Project hooks

# Runs on project start, always
# on_project_start: command
on_project_start: |
export ROS_NAMESPACE=${ROS_NAMESPACE:-/debug}
export CONFIG_DIR=${ANGEL_WORKSPACE_DIR}/src/angel_system_nodes/configs
export NODE_RESOURCES_DIR=${ANGEL_WORKSPACE_DIR}/src/angel_system_nodes/resource
# Run on project start, the first time
# on_project_first_start: command

# Run on project start, after the first time
# on_project_restart: command

# Run on project exit ( detaching from tmux session )
# on_project_exit: command

# Run on project stop
# on_project_stop: command

# Runs in each window and pane before window/pane specific commands. Useful for setting up interpreter versions.
# pre_window: rbenv shell 2.0.0-p247

# Pass command line options to tmux. Useful for specifying a different tmux.conf.
# tmux_options: -f ~/.tmux.mac.conf
tmux_options: -f <%= ENV["ANGEL_WORKSPACE_DIR"] %>/tmux/tmux.conf

# Change the command to call tmux. This can be used by derivatives/wrappers like byobu.
# tmux_command: byobu

# Specifies (by name or index) which window will be selected on project startup. If not set, the first window is used.
# startup_window: editor

# Specifies (by index) which pane of the specified window will be selected on project startup. If not set, the first pane is used.
# startup_pane: 1

# Controls whether the tmux session should be attached to automatically. Defaults to true.
# attach: false

windows:
# - ros_bag_play: ros2 bag play <<PATH_TO_BAG_FILE>>
- ros_bag_play: sleep 5; ros2 bag play /angel_workspace/ros_bags/rosbag2_2023_03_01-17_28_00_0.db3
- vocal:
layout: even-vertical
panes:
- vad: ros2 run angel_system_nodes voice_activity_detector --ros-args
-r __ns:=${ROS_NAMESPACE}
-p input_audio_topic:=HeadsetAudioData
-p output_voice_activity_topic:=DetectedVoiceData
-p vad_server_url:=http://communication.cs.columbia.edu:55667/vad
-p vad_cadence:=3
-p vad_margin:=0.20
-p max_accumulation_length:=10
-p debug_mode:=True
- asr: ros2 run angel_system_nodes asr --ros-args
-r __ns:=${ROS_NAMESPACE}
-p audio_topic:=DetectedVoiceData
-p utterances_topic:=utterances_topic
-p asr_server_url:=http://communication.cs.columbia.edu:55667/asr
-p asr_req_segment_duration:=1
-p is_sentence_tokenize:=False
-p debug_mode:=True
- intent:
layout: even-vertical
panes:
- base_intent_detection: ros2 run angel_system_nodes base_intent_detector --ros-args
-r __ns:=${ROS_NAMESPACE}
-p utterances_topic:=utterances_topic
-p expect_user_intent_topic:=expect_user_intent_topic
-p interp_user_intent_topic:=interp_user_intent_topic
- gpt_intent_detection: ros2 run angel_system_nodes gpt_intent_detector --ros-args
derkmed marked this conversation as resolved.
Show resolved Hide resolved
-r __ns:=${ROS_NAMESPACE}
-p utterances_topic:=utterances_topic
-p expect_user_intent_topic:=expect_user_intent_topic
-p interp_user_intent_topic:=interp_user_intent_topic