Merge pull request OpenInterpreter#9 from KillianLucas/killian/dev-test

Fix Python interpreter edge cases Former-commit-id: a0071e2 Former-commit-id: bf60079613f619de472c3b3a6d1c41b19d701a3b Former-commit-id: 300a69b8f77004dbc80cb4276574c7ee570dc6e6 [formerly 6f30ef514b307063ad388c096f6919eb419fcdb1] Former-commit-id: 04fbb4c0924cd15ecafe0278ca02a3f675a0352b
joshuavial · Aug 13, 2023 · a856ac0 · a856ac0
2 parents b3f55c1 + 19838aa
commit a856ac0
Show file tree

Hide file tree

Showing 4 changed files with 115 additions and 102 deletions.
diff --git a/interpreter/cli.py b/interpreter/cli.py
@@ -21,7 +21,8 @@ def cli(interpreter):
                       '--local',
                       action='store_true',
                       help='run fully local with llama-2')
-  parser.add_argument('--debug',
+  parser.add_argument('-d',
+                      '--debug',
                       action='store_true',
                       help='debug mode. prints extra information')
   args = parser.parse_args()

diff --git a/interpreter/code_interpreter.py b/interpreter/code_interpreter.py
@@ -1,4 +1,6 @@
 import subprocess
+import webbrowser
+import tempfile
 import threading
 import traceback
 import platform
@@ -9,7 +11,19 @@
 import os
 import re
 
-# Mapping of languages to their start and print commands
+
+def run_html(html_content):
+    # Create a temporary HTML file with the content
+    with tempfile.NamedTemporaryFile(delete=False, suffix=".html") as f:
+        f.write(html_content.encode())
+
+    # Open the HTML file with the default web browser
+    webbrowser.open('file://' + os.path.realpath(f.name))
+
+    return f"Saved to {os.path.realpath(f.name)} and opened with the user's default web browser."
+
+
+# Mapping of languages to their start, run, and print commands
 language_map = {
   "python": {
     # Python is run from this interpreter with sys.executable
@@ -32,6 +46,10 @@
     # (We'll prepend "osascript -e" every time, not once at the start, so we want an empty shell)
     "start_cmd": os.environ.get('SHELL', '/bin/zsh'),
     "print_cmd": 'log "{}"'
+  },
+  "html": {
+    "open_subrocess": False,
+    "run_function": run_html,
   }
 }
 
@@ -64,7 +82,8 @@ def start_process(self):
                                  stdin=subprocess.PIPE,
                                  stdout=subprocess.PIPE,
                                  stderr=subprocess.PIPE,
-                                 text=True)
+                                 text=True,
+                                 bufsize=0)
 
     # Start watching ^ its `stdout` and `stderr` streams
     threading.Thread(target=self.save_and_display_stream,
@@ -105,8 +124,11 @@ def run(self):
         return message
     """
 
+    # Should we keep a subprocess open? True by default
+    open_subrocess = language_map[self.language].get("open_subrocess", True)
+
     # Start the subprocess if it hasn't been started
-    if not self.proc:
+    if not self.proc and open_subrocess:
       try:
         self.start_process()
       except:
@@ -127,41 +149,41 @@ def run(self):
     self.output = ""
 
     # Use the print_cmd for the selected language
-    self.print_cmd = language_map[self.language]["print_cmd"]
+    self.print_cmd = language_map[self.language].get("print_cmd")
     code = self.code
 
     # Add print commands that tell us what the active line is
-    code = self.add_active_line_prints(code)
-
-    # If it's Python, we also need to prepare it for `python -i`
-    if self.language == "python":
-
-      # Normalize code by parsing then unparsing it
+    if self.print_cmd:
       try:
-        code = prepare_for_python_interactive(code)
+        code = self.add_active_line_prints(code)
       except:
         # If this failed, it means the code didn't compile
         # This traceback will be our output.
 
         traceback_string = traceback.format_exc()
         self.output = traceback_string
         self.update_active_block()
-
+  
         # Before you return, wait for the display to catch up?
         # (I'm not sure why this works)
         time.sleep(0.1)
-
+  
         return self.output
-
-      code = fix_code_indentation(code)
+
+    if self.language == "python":
+      # This lets us stop execution when error happens (which is not default -i behavior)
+      # And solves a bunch of indentation problems-- if everything's indented, -i treats it as one block
+      code = wrap_in_try_except(code)
 
     # Remove any whitespace lines, as this will break indented blocks
+    # (are we sure about this? test this)
     code_lines = code.split("\n")
     code_lines = [c for c in code_lines if c.strip() != ""]
     code = "\n".join(code_lines)
 
     # Add end command (we'll be listening for this so we know when it ends)
-    code += "\n\n" + self.print_cmd.format('END_OF_EXECUTION')
+    if self.print_cmd:
+      code += "\n\n" + self.print_cmd.format('END_OF_EXECUTION')
 
     # Applescript-specific processing
     if self.language == "applescript":
@@ -171,13 +193,18 @@ def run(self):
       code = '"' + code + '"'
       # Prepend start command
       code = "osascript -e " + code
-
+      
     # Debug
     if self.debug_mode:
       print("Running code:")
       print(code)
       print("---")
 
+    # HTML-specific processing (and running)
+    if self.language == "html":
+      output = language_map["html"]["run_function"](code)
+      return output
+
     # Reset self.done so we can .wait() for it
     self.done = threading.Event()
     self.done.clear()
@@ -280,6 +307,12 @@ def save_and_display_stream(self, stream):
         # Remove trailing ">"s
         line = re.sub(r'^\s*(>\s*)+', '', line)
 
+      # Python's interactive REPL outputs a million things
+      # So we clean it up:
+      if self.language == "python":
+        if re.match(r'^(\s*>>>\s*|\s*\.\.\.\s*)', line):
+          continue
+
       # Check if it's a message we added (like ACTIVE_LINE)
       # Or if we should save it to self.output
       if line.startswith("ACTIVE_LINE:"):
@@ -295,20 +328,6 @@ def save_and_display_stream(self, stream):
 
       self.update_active_block()
 
-def fix_code_indentation(code):
-  lines = code.split("\n")
-  fixed_lines = []
-  was_indented = False
-  for line in lines:
-    current_indent = len(line) - len(line.lstrip())
-    if current_indent == 0 and was_indented:
-      fixed_lines.append('')  # Add an empty line after an indented block
-    fixed_lines.append(line)
-    was_indented = current_indent > 0
-
-  return "\n".join(fixed_lines)
-
-
 def truncate_output(data):
 
   # In the future, this will come from a config file
@@ -348,10 +367,16 @@ def insert_print_statement(self, line_number):
     def process_body(self, body):
         """Processes a block of statements, adding print calls."""
         new_body = []
+
+        # In case it's not iterable:
+        if not isinstance(body, list):
+            body = [body]
+
         for sub_node in body:
             if hasattr(sub_node, 'lineno'):
                 new_body.append(self.insert_print_statement(sub_node.lineno))
             new_body.append(sub_node)
+
         return new_body
 
     def visit(self, node):
@@ -384,29 +409,37 @@ def add_active_line_prints_to_python(code):
     new_tree = transformer.visit(tree)
     return ast.unparse(new_tree)
 
-def prepare_for_python_interactive(code):
-    """
-    Adjusts code formatting for the python -i flag. It adds newlines based 
-    on whitespace to make code work in interactive mode.
-    """
-
-    def get_indentation(line):
-        """Returns the number of leading spaces in a line, treating 4 spaces as one level of indentation."""
-        return len(line) - len(line.lstrip())
-
-    lines = code.split('\n')
-    adjusted_code = []
-
-    previous_indentation = 0
-
-    for line in lines:
-        current_indentation = get_indentation(line)
-
-        if current_indentation < previous_indentation:
-            if not (line.strip().startswith("except:") or line.strip().startswith("else:") or line.strip().startswith("elif:") or line.strip().startswith("finally:")):
-              adjusted_code.append('')  # end of block
+def wrap_in_try_except(code):
+    # Add import traceback
+    code = "import traceback\n" + code
+
+    # Parse the input code into an AST
+    parsed_code = ast.parse(code)
+
+    # Wrap the entire code's AST in a single try-except block
+    try_except = ast.Try(
+        body=parsed_code.body,
+        handlers=[
+            ast.ExceptHandler(
+                type=ast.Name(id="Exception", ctx=ast.Load()),
+                name=None,
+                body=[
+                    ast.Expr(
+                        value=ast.Call(
+                            func=ast.Attribute(value=ast.Name(id="traceback", ctx=ast.Load()), attr="print_exc", ctx=ast.Load()),
+                            args=[],
+                            keywords=[]
+                        )
+                    ),
+                ]
+            )
+        ],
+        orelse=[],
+        finalbody=[]
+    )
 
-        adjusted_code.append(line)
-        previous_indentation = current_indentation
+    # Assign the try-except block as the new body
+    parsed_code.body = [try_except]
 
-    return '\n'.join(adjusted_code)
+    # Convert the modified AST back to source code
+    return ast.unparse(parsed_code)
diff --git a/interpreter/interpreter.py b/interpreter/interpreter.py
@@ -28,7 +28,7 @@
         "type": "string",
         "description":
         "The programming language.",
-        "enum": ["python", "shell", "applescript", "javascript"]
+        "enum": ["python", "shell", "applescript", "javascript", "html"]
       },
       "code": {
         "type": "string",

diff --git a/interpreter/utils.py b/interpreter/utils.py
@@ -32,67 +32,46 @@ def escape_newlines_in_json_string_values(s):
     return ''.join(result)
 
 def parse_partial_json(s):
-    """
-    Tries to parse a string as JSON and if it fails, attempts to 'close' any open JSON structures.
-
-    Parameters:
-    s (str): The string to parse as JSON.
-
-    Returns:
-    json: The parsed JSON if successful, or None if it fails even after attempting to close open structures.
-    """
-
-    # First, try to parse the string as-is. If it's valid JSON, we'll return it directly.
-    try:
-        return json.loads(s)
-    except json.JSONDecodeError:
-        pass  # The string is not valid JSON. We'll try to handle this case below.
-
-    # First, make sure newlines inside double quotes are escaped properly (a common error in GPT function calls)
-    s = escape_newlines_in_json_string_values(s)
-
-    # Initialize a stack to keep track of open braces and brackets.
+    # Initialize a stack to keep track of open braces, brackets, and strings.
     stack = []
-
-    # Initialize a flag to keep track of whether we're currently inside a string.
     is_inside_string = False
+    escaped = False
 
     # Process each character in the string one at a time.
     for char in s:
-
-        # Handle quotes, which denote the start or end of a string in JSON.
-        if char == '"':
-
-            if stack and stack[-1] == '\\': # <- This is a single backslash, even though it looks like two!
-
-                # This quote is escaped, so it doesn't affect whether we're inside a string.
-                stack.pop()
+        if is_inside_string:
+            if char == '"' and not escaped:
+                is_inside_string = False
+            elif char == '\\':
+                escaped = not escaped
             else:
-                # This quote is not escaped, so it toggles whether we're inside a string.
-                is_inside_string = not is_inside_string
-
-        # If we're not inside a string, we need to handle braces and brackets.
-        elif not is_inside_string:
-            if char == '{' or char == '[':
-                # This character opens a new structure, so add it to the stack.
-                stack.append(char)
+                escaped = False
+        else:
+            if char == '"':
+                is_inside_string = True
+                escaped = False
+            elif char == '{':
+                stack.append('}')
+            elif char == '[':
+                stack.append(']')
             elif char == '}' or char == ']':
-                # This character closes a structure, so remove the most recently opened structure from the stack.
-                if stack:
+                if stack and stack[-1] == char:
                     stack.pop()
+                else:
+                    # Mismatched closing character; the input is malformed.
+                    return None
 
     # If we're still inside a string at the end of processing, we need to close the string.
     if is_inside_string:
         s += '"'
 
     # Close any remaining open structures in the reverse order that they were opened.
-    while stack:
-        open_char = stack.pop()
-        s += '}' if open_char == '{' else ']'
+    for closing_char in reversed(stack):
+        s += closing_char
 
-    # Attempt to parse the string as JSON again now that we've closed all open structures.
+    # Attempt to parse the modified string as JSON.
     try:
         return json.loads(s)
     except json.JSONDecodeError:
         # If we still can't parse the string as JSON, return None to indicate failure.
-        return None
+        return None