Merge pull request NVIDIA#764 from NVIDIA/feat/migration-tool-sample-…

…converstaion-syntax-conversion Feat/migration tool sample converstaion syntax conversion
OnScale · Sep 18, 2024 · 33195ff · 33195ff
2 parents 5cbb2b6 + 2a88156
commit 33195ff
Show file tree

Hide file tree

Showing 2 changed files with 281 additions and 1 deletion.
diff --git a/nemoguardrails/cli/migration.py b/nemoguardrails/cli/migration.py
@@ -837,6 +837,8 @@ def _process_config_files(config_files_to_process: List[str]) -> int:
     Args:
         config_files_to_process (List[str]): The list of config files to process.
 
+    Returns:
+        int: The total number of config files changed.
     """
     total_config_files_changed = 0
 
@@ -857,6 +859,9 @@ def _process_config_files(config_files_to_process: List[str]) -> int:
         # set colang version to 2.x
         _set_colang_version(version="2.x", file_path=file_path)
 
+        # Process the sample_conversation section
+        _process_sample_conversation_in_config(file_path)
+
     return total_config_files_changed
 
 
@@ -928,3 +933,125 @@ def _remove_files_from_path(path, filenames: list[str]):
         file_path = os.path.join(path, filename)
         if os.path.exists(file_path):
             os.remove(file_path)
+
+
+def convert_sample_conversation_syntax(lines: List[str]) -> List[str]:
+    """Converts the sample_conversation section from the old format to the new format.
+
+    Args:
+        lines (List[str]): The lines of the sample_conversation to convert.
+
+    Returns:
+        List[str]: The new lines of the sample_conversation after conversion.
+    """
+    new_lines = []
+    i = 0
+    while i < len(lines):
+        line = lines[i].rstrip("\n")
+        # skip empty lines
+        if line.strip() == "":
+            new_lines.append(line + "\n")
+            i += 1
+            continue
+
+        # proccess 'user' lines
+        if line.startswith("user "):
+            # Check if line matches 'user "message"'
+            m = re.match(r'user\s+"(.*)"', line)
+            if m:
+                message = m.group(1)
+                new_lines.append(f'user action: user said "{message}"\n')
+                # We know that the  next line is intent
+                if i + 1 < len(lines):
+                    intent_line = lines[i + 1].strip()
+                    if intent_line:
+                        # Include 'user' prefix in the intent
+                        new_lines.append(f"user intent: user {intent_line}\n")
+                    i += 2
+                else:
+                    i += 1
+            else:
+                i += 1
+        elif line.startswith("bot "):
+            # Check wether line is 'bot intent'
+            m = re.match(r"bot\s+(.*)", line)
+            if m:
+                intent = m.group(1)
+                # include 'bot' prefix in the intent
+                new_lines.append(f"bot intent: bot {intent}\n")
+                # next line is message
+                if i + 1 < len(lines):
+                    message_line = lines[i + 1].strip()
+                    m2 = re.match(r'"(.*)"', message_line)
+                    if m2:
+                        message = m2.group(1)
+                        new_lines.append(f'bot action: bot say "{message}"\n')
+                        i += 2
+                    else:
+                        i += 1
+                else:
+                    i += 1
+            else:
+                i += 1
+        else:
+            # other lines remain as is
+            new_lines.append(line + "\n")
+            i += 1
+    return new_lines
+
+
+def _process_sample_conversation_in_config(file_path: str):
+    """Processes the sample_conversation section in the config file.
+
+    Args:
+        file_path (str): The path to the config file.
+    """
+    with open(file_path, "r") as f:
+        lines = f.readlines()
+
+    # Find 'sample_conversation:' line
+    sample_conv_line_idx = None
+    for idx, line in enumerate(lines):
+        if re.match(r"^\s*sample_conversation:\s*\|", line):
+            sample_conv_line_idx = idx
+            break
+    if sample_conv_line_idx is None:
+        return  # No sample_conversation in file
+
+    # get the base indentation
+    base_indent = len(lines[sample_conv_line_idx]) - len(
+        lines[sample_conv_line_idx].lstrip()
+    )
+    sample_conv_indent = None
+
+    # get sample_conversation lines
+    sample_lines = []
+    i = sample_conv_line_idx + 1
+    while i < len(lines):
+        line = lines[i]
+        # Check if the line is indented more than base_indent
+        line_indent = len(line) - len(line.lstrip())
+        if line.strip() == "":
+            sample_lines.append(line)
+            i += 1
+            continue
+        if line_indent > base_indent:
+            if sample_conv_indent is None:
+                sample_conv_indent = line_indent
+            sample_lines.append(line)
+            i += 1
+        else:
+            # end of sample conversations lines
+            break
+    sample_conv_end_idx = i
+
+    stripped_sample_lines = [line[sample_conv_indent:] for line in sample_lines]
+    new_sample_lines = convert_sample_conversation_syntax(stripped_sample_lines)
+    # revert  the indentation
+    indented_new_sample_lines = [
+        " " * sample_conv_indent + line for line in new_sample_lines
+    ]
+    lines[sample_conv_line_idx + 1 : sample_conv_end_idx] = indented_new_sample_lines
+    # Write back the modified lines
+    with open(file_path, "w") as f:
+        f.writelines(lines)
diff --git a/tests/test_cli_migration.py b/tests/test_cli_migration.py
@@ -17,7 +17,10 @@
 
 import pytest
 
-from nemoguardrails.cli.migration import convert_colang_2alpha_syntax
+from nemoguardrails.cli.migration import (
+    convert_colang_1_syntax,
+    convert_colang_2alpha_syntax,
+)
 
 
 class TestColang2AlphaSyntaxConversion:
@@ -117,3 +120,153 @@ def test_convert_flow_examples(self):
         output_lines = textwrap.dedent(output_1).strip().split("\n")
 
         assert convert_colang_2alpha_syntax(input_lines) == output_lines
+
+
+class TestColang1SyntaxConversion:
+    def test_define_flow_conversion(self):
+        input_lines = ["define flow express greeting"]
+        expected_output = ["flow express greeting"]
+        assert convert_colang_1_syntax(input_lines) == expected_output
+
+    def test_define_subflow_conversion(self):
+        input_lines = ["define subflow my_subflow"]
+        expected_output = ["flow my_subflow"]
+        assert convert_colang_1_syntax(input_lines) == expected_output
+
+    def test_execute_to_await_and_pascal_case_action(self):
+        input_lines = ["execute some_action"]
+        expected_output = ["await SomeAction"]
+        assert convert_colang_1_syntax(input_lines) == expected_output
+
+    def test_stop_to_abort(self):
+        input_lines = ["stop"]
+        expected_output = ["abort"]
+        assert convert_colang_1_syntax(input_lines) == expected_output
+
+    def test_anonymous_flow_revised(self):
+        input_lines = ["flow", "user said hello"]
+        # because the flow is anonymous and only 'flow' is given, it will be converted to 'flow said hello' based on the first message
+        expected_output = ["flow said hello", "user said hello"]
+        output = convert_colang_1_syntax(input_lines)
+        # strip newline characters from the strings in the output list
+        output = [line.rstrip("\n") for line in output]
+        assert output == expected_output
+
+    def test_global_variable_assignment(self):
+        input_lines = ["$variable = value"]
+        expected_output = ["global $variable\n$variable = value"]
+        assert convert_colang_1_syntax(input_lines) == expected_output
+
+    def test_variable_assignment_in_await(self):
+        input_lines = ["$result = await some_action"]
+        expected_output = ["$result = await SomeAction"]
+        assert convert_colang_1_syntax(input_lines) == expected_output
+
+    def test_bot_say_conversion(self):
+        input_lines = ["define bot", '"Hello!"', '"How can I help you?"']
+        expected_output = [
+            "flow bot",
+            'bot say "Hello!"',
+            'or bot say "How can I help you?"',
+        ]
+        assert convert_colang_1_syntax(input_lines) == expected_output
+
+    def test_user_said_conversion(self):
+        input_lines = ["define user", '"I need assistance."', '"Can you help me?"']
+        expected_output = [
+            "flow user",
+            'user said "I need assistance."',
+            'or user said "Can you help me?"',
+        ]
+        assert convert_colang_1_syntax(input_lines) == expected_output
+
+    def test_create_event_to_send(self):
+        input_lines = ["    create event user_asked_question"]
+        expected_output = ["    send user_asked_question"]
+        assert convert_colang_1_syntax(input_lines) == expected_output
+
+    def test_config_variable_replacement(self):
+        # TODO(Rdinu): Need to see if this conversion is correct
+        input_lines = ["$config.setting = true"]
+        expected_output = [
+            "global $system.config.setting\n$system.config.setting = true"
+        ]
+        assert convert_colang_1_syntax(input_lines) == expected_output
+
+    def test_flow_with_special_characters(self):
+        input_lines = ["define flow my-flow's_test"]
+        expected_output = ["flow my flow s_test"]
+        assert convert_colang_1_syntax(input_lines) == expected_output
+
+    def test_ellipsis_variable_assignment(self):
+        input_lines = ["# User's name", "$name = ...", "await greet_user"]
+        expected_output = [
+            "# User's name",
+            "global $name\n$name = ...",
+            "await GreetUserAction",
+        ]
+
+        expected_output = [
+            "# User's name",
+            'global $name\n$name = ... "User\'s name"',
+            "await GreetUserAction",
+        ]
+        assert convert_colang_1_syntax(input_lines) == expected_output
+
+    @pytest.mark.skip("not implemented conversion")
+    def test_complex_conversion(self):
+        # TODO: add bot $response to bot say $response conversion
+        input_script = """
+        define flow greeting_flow
+            when user express greeting
+                $response = execute generate_greeting
+                bot $response
+        """
+        expected_output_script = """
+        flow greeting_flow
+            when user express greeting
+                $response = await GenerateGreetingAction
+                bot say $response
+        """
+        input_lines = textwrap.dedent(input_script).strip().split("\n")
+        expected_output = textwrap.dedent(expected_output_script).strip().split("\n")
+
+        print(convert_colang_1_syntax(input_lines))
+        assert convert_colang_1_syntax(input_lines) == expected_output
+
+    def test_flow_with_execute_and_stop(self):
+        input_lines = [
+            "define flow sample_flow",
+            '    when user "Cancel"',
+            "        execute cancel_operation",
+            "        stop",
+        ]
+        expected_output = [
+            "flow sample_flow",
+            '    when user "Cancel"',
+            "        await CancelOperationAction",
+            "        abort",
+        ]
+        assert convert_colang_1_syntax(input_lines) == expected_output
+
+    def test_await_camelcase_conversion(self):
+        input_lines = ["await sample_action"]
+        expected_output = ["await SampleAction"]
+        assert convert_colang_1_syntax(input_lines) == expected_output
+
+    def test_nested_flow_conversion(self):
+        input_script = """
+        define flow outer_flow
+            when condition_met
+                define subflow inner_flow
+                    execute inner_action
+        """
+        expected_output_script = """
+        flow outer_flow
+            when condition_met
+                flow inner_flow
+                    await InnerAction
+        """
+        input_lines = textwrap.dedent(input_script).strip().split("\n")
+        expected_output = textwrap.dedent(expected_output_script).strip().split("\n")
+        assert convert_colang_1_syntax(input_lines) == expected_output