Skip to content

Commit

Permalink
Merge pull request NVIDIA#764 from NVIDIA/feat/migration-tool-sample-…
Browse files Browse the repository at this point in the history
…converstaion-syntax-conversion

Feat/migration tool sample converstaion syntax conversion
  • Loading branch information
cparisien authored Sep 18, 2024
2 parents 5cbb2b6 + 2a88156 commit 33195ff
Show file tree
Hide file tree
Showing 2 changed files with 281 additions and 1 deletion.
127 changes: 127 additions & 0 deletions nemoguardrails/cli/migration.py
Original file line number Diff line number Diff line change
Expand Up @@ -837,6 +837,8 @@ def _process_config_files(config_files_to_process: List[str]) -> int:
Args:
config_files_to_process (List[str]): The list of config files to process.
Returns:
int: The total number of config files changed.
"""
total_config_files_changed = 0

Expand All @@ -857,6 +859,9 @@ def _process_config_files(config_files_to_process: List[str]) -> int:
# set colang version to 2.x
_set_colang_version(version="2.x", file_path=file_path)

# Process the sample_conversation section
_process_sample_conversation_in_config(file_path)

return total_config_files_changed


Expand Down Expand Up @@ -928,3 +933,125 @@ def _remove_files_from_path(path, filenames: list[str]):
file_path = os.path.join(path, filename)
if os.path.exists(file_path):
os.remove(file_path)


def convert_sample_conversation_syntax(lines: List[str]) -> List[str]:
"""Converts the sample_conversation section from the old format to the new format.
Args:
lines (List[str]): The lines of the sample_conversation to convert.
Returns:
List[str]: The new lines of the sample_conversation after conversion.
"""
new_lines = []
i = 0
while i < len(lines):
line = lines[i].rstrip("\n")
# skip empty lines
if line.strip() == "":
new_lines.append(line + "\n")
i += 1
continue

# proccess 'user' lines
if line.startswith("user "):
# Check if line matches 'user "message"'
m = re.match(r'user\s+"(.*)"', line)
if m:
message = m.group(1)
new_lines.append(f'user action: user said "{message}"\n')
# We know that the next line is intent
if i + 1 < len(lines):
intent_line = lines[i + 1].strip()
if intent_line:
# Include 'user' prefix in the intent
new_lines.append(f"user intent: user {intent_line}\n")
i += 2
else:
i += 1
else:
i += 1
elif line.startswith("bot "):
# Check wether line is 'bot intent'
m = re.match(r"bot\s+(.*)", line)
if m:
intent = m.group(1)
# include 'bot' prefix in the intent
new_lines.append(f"bot intent: bot {intent}\n")
# next line is message
if i + 1 < len(lines):
message_line = lines[i + 1].strip()
m2 = re.match(r'"(.*)"', message_line)
if m2:
message = m2.group(1)
new_lines.append(f'bot action: bot say "{message}"\n')
i += 2
else:
i += 1
else:
i += 1
else:
i += 1
else:
# other lines remain as is
new_lines.append(line + "\n")
i += 1
return new_lines


def _process_sample_conversation_in_config(file_path: str):
"""Processes the sample_conversation section in the config file.
Args:
file_path (str): The path to the config file.
"""
with open(file_path, "r") as f:
lines = f.readlines()

# Find 'sample_conversation:' line
sample_conv_line_idx = None
for idx, line in enumerate(lines):
if re.match(r"^\s*sample_conversation:\s*\|", line):
sample_conv_line_idx = idx
break
if sample_conv_line_idx is None:
return # No sample_conversation in file

# get the base indentation
base_indent = len(lines[sample_conv_line_idx]) - len(
lines[sample_conv_line_idx].lstrip()
)
sample_conv_indent = None

# get sample_conversation lines
sample_lines = []
i = sample_conv_line_idx + 1
while i < len(lines):
line = lines[i]
# Check if the line is indented more than base_indent
line_indent = len(line) - len(line.lstrip())
if line.strip() == "":
sample_lines.append(line)
i += 1
continue
if line_indent > base_indent:
if sample_conv_indent is None:
sample_conv_indent = line_indent
sample_lines.append(line)
i += 1
else:
# end of sample conversations lines
break
sample_conv_end_idx = i

stripped_sample_lines = [line[sample_conv_indent:] for line in sample_lines]
new_sample_lines = convert_sample_conversation_syntax(stripped_sample_lines)
# revert the indentation
indented_new_sample_lines = [
" " * sample_conv_indent + line for line in new_sample_lines
]
lines[sample_conv_line_idx + 1 : sample_conv_end_idx] = indented_new_sample_lines
# Write back the modified lines
with open(file_path, "w") as f:
f.writelines(lines)
155 changes: 154 additions & 1 deletion tests/test_cli_migration.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,10 @@

import pytest

from nemoguardrails.cli.migration import convert_colang_2alpha_syntax
from nemoguardrails.cli.migration import (
convert_colang_1_syntax,
convert_colang_2alpha_syntax,
)


class TestColang2AlphaSyntaxConversion:
Expand Down Expand Up @@ -117,3 +120,153 @@ def test_convert_flow_examples(self):
output_lines = textwrap.dedent(output_1).strip().split("\n")

assert convert_colang_2alpha_syntax(input_lines) == output_lines


class TestColang1SyntaxConversion:
def test_define_flow_conversion(self):
input_lines = ["define flow express greeting"]
expected_output = ["flow express greeting"]
assert convert_colang_1_syntax(input_lines) == expected_output

def test_define_subflow_conversion(self):
input_lines = ["define subflow my_subflow"]
expected_output = ["flow my_subflow"]
assert convert_colang_1_syntax(input_lines) == expected_output

def test_execute_to_await_and_pascal_case_action(self):
input_lines = ["execute some_action"]
expected_output = ["await SomeAction"]
assert convert_colang_1_syntax(input_lines) == expected_output

def test_stop_to_abort(self):
input_lines = ["stop"]
expected_output = ["abort"]
assert convert_colang_1_syntax(input_lines) == expected_output

def test_anonymous_flow_revised(self):
input_lines = ["flow", "user said hello"]
# because the flow is anonymous and only 'flow' is given, it will be converted to 'flow said hello' based on the first message
expected_output = ["flow said hello", "user said hello"]
output = convert_colang_1_syntax(input_lines)
# strip newline characters from the strings in the output list
output = [line.rstrip("\n") for line in output]
assert output == expected_output

def test_global_variable_assignment(self):
input_lines = ["$variable = value"]
expected_output = ["global $variable\n$variable = value"]
assert convert_colang_1_syntax(input_lines) == expected_output

def test_variable_assignment_in_await(self):
input_lines = ["$result = await some_action"]
expected_output = ["$result = await SomeAction"]
assert convert_colang_1_syntax(input_lines) == expected_output

def test_bot_say_conversion(self):
input_lines = ["define bot", '"Hello!"', '"How can I help you?"']
expected_output = [
"flow bot",
'bot say "Hello!"',
'or bot say "How can I help you?"',
]
assert convert_colang_1_syntax(input_lines) == expected_output

def test_user_said_conversion(self):
input_lines = ["define user", '"I need assistance."', '"Can you help me?"']
expected_output = [
"flow user",
'user said "I need assistance."',
'or user said "Can you help me?"',
]
assert convert_colang_1_syntax(input_lines) == expected_output

def test_create_event_to_send(self):
input_lines = [" create event user_asked_question"]
expected_output = [" send user_asked_question"]
assert convert_colang_1_syntax(input_lines) == expected_output

def test_config_variable_replacement(self):
# TODO(Rdinu): Need to see if this conversion is correct
input_lines = ["$config.setting = true"]
expected_output = [
"global $system.config.setting\n$system.config.setting = true"
]
assert convert_colang_1_syntax(input_lines) == expected_output

def test_flow_with_special_characters(self):
input_lines = ["define flow my-flow's_test"]
expected_output = ["flow my flow s_test"]
assert convert_colang_1_syntax(input_lines) == expected_output

def test_ellipsis_variable_assignment(self):
input_lines = ["# User's name", "$name = ...", "await greet_user"]
expected_output = [
"# User's name",
"global $name\n$name = ...",
"await GreetUserAction",
]

expected_output = [
"# User's name",
'global $name\n$name = ... "User\'s name"',
"await GreetUserAction",
]
assert convert_colang_1_syntax(input_lines) == expected_output

@pytest.mark.skip("not implemented conversion")
def test_complex_conversion(self):
# TODO: add bot $response to bot say $response conversion
input_script = """
define flow greeting_flow
when user express greeting
$response = execute generate_greeting
bot $response
"""
expected_output_script = """
flow greeting_flow
when user express greeting
$response = await GenerateGreetingAction
bot say $response
"""
input_lines = textwrap.dedent(input_script).strip().split("\n")
expected_output = textwrap.dedent(expected_output_script).strip().split("\n")

print(convert_colang_1_syntax(input_lines))
assert convert_colang_1_syntax(input_lines) == expected_output

def test_flow_with_execute_and_stop(self):
input_lines = [
"define flow sample_flow",
' when user "Cancel"',
" execute cancel_operation",
" stop",
]
expected_output = [
"flow sample_flow",
' when user "Cancel"',
" await CancelOperationAction",
" abort",
]
assert convert_colang_1_syntax(input_lines) == expected_output

def test_await_camelcase_conversion(self):
input_lines = ["await sample_action"]
expected_output = ["await SampleAction"]
assert convert_colang_1_syntax(input_lines) == expected_output

def test_nested_flow_conversion(self):
input_script = """
define flow outer_flow
when condition_met
define subflow inner_flow
execute inner_action
"""
expected_output_script = """
flow outer_flow
when condition_met
flow inner_flow
await InnerAction
"""
input_lines = textwrap.dedent(input_script).strip().split("\n")
expected_output = textwrap.dedent(expected_output_script).strip().split("\n")
assert convert_colang_1_syntax(input_lines) == expected_output

0 comments on commit 33195ff

Please sign in to comment.