Skip to content

Commit

Permalink
Merge pull request OpenInterpreter#9 from KillianLucas/killian/dev-test
Browse files Browse the repository at this point in the history
Fix Python interpreter edge cases

Former-commit-id: a0071e2
Former-commit-id: bf60079613f619de472c3b3a6d1c41b19d701a3b
Former-commit-id: 300a69b8f77004dbc80cb4276574c7ee570dc6e6 [formerly 6f30ef514b307063ad388c096f6919eb419fcdb1]
Former-commit-id: 04fbb4c0924cd15ecafe0278ca02a3f675a0352b
  • Loading branch information
KillianLucas authored Aug 13, 2023
2 parents b3f55c1 + 19838aa commit a856ac0
Show file tree
Hide file tree
Showing 4 changed files with 115 additions and 102 deletions.
3 changes: 2 additions & 1 deletion interpreter/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@ def cli(interpreter):
'--local',
action='store_true',
help='run fully local with llama-2')
parser.add_argument('--debug',
parser.add_argument('-d',
'--debug',
action='store_true',
help='debug mode. prints extra information')
args = parser.parse_args()
Expand Down
143 changes: 88 additions & 55 deletions interpreter/code_interpreter.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import subprocess
import webbrowser
import tempfile
import threading
import traceback
import platform
Expand All @@ -9,7 +11,19 @@
import os
import re

# Mapping of languages to their start and print commands

def run_html(html_content):
# Create a temporary HTML file with the content
with tempfile.NamedTemporaryFile(delete=False, suffix=".html") as f:
f.write(html_content.encode())

# Open the HTML file with the default web browser
webbrowser.open('file://' + os.path.realpath(f.name))

return f"Saved to {os.path.realpath(f.name)} and opened with the user's default web browser."


# Mapping of languages to their start, run, and print commands
language_map = {
"python": {
# Python is run from this interpreter with sys.executable
Expand All @@ -32,6 +46,10 @@
# (We'll prepend "osascript -e" every time, not once at the start, so we want an empty shell)
"start_cmd": os.environ.get('SHELL', '/bin/zsh'),
"print_cmd": 'log "{}"'
},
"html": {
"open_subrocess": False,
"run_function": run_html,
}
}

Expand Down Expand Up @@ -64,7 +82,8 @@ def start_process(self):
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True)
text=True,
bufsize=0)

# Start watching ^ its `stdout` and `stderr` streams
threading.Thread(target=self.save_and_display_stream,
Expand Down Expand Up @@ -105,8 +124,11 @@ def run(self):
return message
"""

# Should we keep a subprocess open? True by default
open_subrocess = language_map[self.language].get("open_subrocess", True)

# Start the subprocess if it hasn't been started
if not self.proc:
if not self.proc and open_subrocess:
try:
self.start_process()
except:
Expand All @@ -127,41 +149,41 @@ def run(self):
self.output = ""

# Use the print_cmd for the selected language
self.print_cmd = language_map[self.language]["print_cmd"]
self.print_cmd = language_map[self.language].get("print_cmd")
code = self.code

# Add print commands that tell us what the active line is
code = self.add_active_line_prints(code)

# If it's Python, we also need to prepare it for `python -i`
if self.language == "python":

# Normalize code by parsing then unparsing it
if self.print_cmd:
try:
code = prepare_for_python_interactive(code)
code = self.add_active_line_prints(code)
except:
# If this failed, it means the code didn't compile
# This traceback will be our output.

traceback_string = traceback.format_exc()
self.output = traceback_string
self.update_active_block()

# Before you return, wait for the display to catch up?
# (I'm not sure why this works)
time.sleep(0.1)

return self.output

code = fix_code_indentation(code)

if self.language == "python":
# This lets us stop execution when error happens (which is not default -i behavior)
# And solves a bunch of indentation problems-- if everything's indented, -i treats it as one block
code = wrap_in_try_except(code)

# Remove any whitespace lines, as this will break indented blocks
# (are we sure about this? test this)
code_lines = code.split("\n")
code_lines = [c for c in code_lines if c.strip() != ""]
code = "\n".join(code_lines)

# Add end command (we'll be listening for this so we know when it ends)
code += "\n\n" + self.print_cmd.format('END_OF_EXECUTION')
if self.print_cmd:
code += "\n\n" + self.print_cmd.format('END_OF_EXECUTION')

# Applescript-specific processing
if self.language == "applescript":
Expand All @@ -171,13 +193,18 @@ def run(self):
code = '"' + code + '"'
# Prepend start command
code = "osascript -e " + code

# Debug
if self.debug_mode:
print("Running code:")
print(code)
print("---")

# HTML-specific processing (and running)
if self.language == "html":
output = language_map["html"]["run_function"](code)
return output

# Reset self.done so we can .wait() for it
self.done = threading.Event()
self.done.clear()
Expand Down Expand Up @@ -280,6 +307,12 @@ def save_and_display_stream(self, stream):
# Remove trailing ">"s
line = re.sub(r'^\s*(>\s*)+', '', line)

# Python's interactive REPL outputs a million things
# So we clean it up:
if self.language == "python":
if re.match(r'^(\s*>>>\s*|\s*\.\.\.\s*)', line):
continue

# Check if it's a message we added (like ACTIVE_LINE)
# Or if we should save it to self.output
if line.startswith("ACTIVE_LINE:"):
Expand All @@ -295,20 +328,6 @@ def save_and_display_stream(self, stream):

self.update_active_block()

def fix_code_indentation(code):
lines = code.split("\n")
fixed_lines = []
was_indented = False
for line in lines:
current_indent = len(line) - len(line.lstrip())
if current_indent == 0 and was_indented:
fixed_lines.append('') # Add an empty line after an indented block
fixed_lines.append(line)
was_indented = current_indent > 0

return "\n".join(fixed_lines)


def truncate_output(data):

# In the future, this will come from a config file
Expand Down Expand Up @@ -348,10 +367,16 @@ def insert_print_statement(self, line_number):
def process_body(self, body):
"""Processes a block of statements, adding print calls."""
new_body = []

# In case it's not iterable:
if not isinstance(body, list):
body = [body]

for sub_node in body:
if hasattr(sub_node, 'lineno'):
new_body.append(self.insert_print_statement(sub_node.lineno))
new_body.append(sub_node)

return new_body

def visit(self, node):
Expand Down Expand Up @@ -384,29 +409,37 @@ def add_active_line_prints_to_python(code):
new_tree = transformer.visit(tree)
return ast.unparse(new_tree)

def prepare_for_python_interactive(code):
"""
Adjusts code formatting for the python -i flag. It adds newlines based
on whitespace to make code work in interactive mode.
"""

def get_indentation(line):
"""Returns the number of leading spaces in a line, treating 4 spaces as one level of indentation."""
return len(line) - len(line.lstrip())

lines = code.split('\n')
adjusted_code = []

previous_indentation = 0

for line in lines:
current_indentation = get_indentation(line)

if current_indentation < previous_indentation:
if not (line.strip().startswith("except:") or line.strip().startswith("else:") or line.strip().startswith("elif:") or line.strip().startswith("finally:")):
adjusted_code.append('') # end of block
def wrap_in_try_except(code):
# Add import traceback
code = "import traceback\n" + code

# Parse the input code into an AST
parsed_code = ast.parse(code)

# Wrap the entire code's AST in a single try-except block
try_except = ast.Try(
body=parsed_code.body,
handlers=[
ast.ExceptHandler(
type=ast.Name(id="Exception", ctx=ast.Load()),
name=None,
body=[
ast.Expr(
value=ast.Call(
func=ast.Attribute(value=ast.Name(id="traceback", ctx=ast.Load()), attr="print_exc", ctx=ast.Load()),
args=[],
keywords=[]
)
),
]
)
],
orelse=[],
finalbody=[]
)

adjusted_code.append(line)
previous_indentation = current_indentation
# Assign the try-except block as the new body
parsed_code.body = [try_except]

return '\n'.join(adjusted_code)
# Convert the modified AST back to source code
return ast.unparse(parsed_code)
2 changes: 1 addition & 1 deletion interpreter/interpreter.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
"type": "string",
"description":
"The programming language.",
"enum": ["python", "shell", "applescript", "javascript"]
"enum": ["python", "shell", "applescript", "javascript", "html"]
},
"code": {
"type": "string",
Expand Down
69 changes: 24 additions & 45 deletions interpreter/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,67 +32,46 @@ def escape_newlines_in_json_string_values(s):
return ''.join(result)

def parse_partial_json(s):
"""
Tries to parse a string as JSON and if it fails, attempts to 'close' any open JSON structures.
Parameters:
s (str): The string to parse as JSON.
Returns:
json: The parsed JSON if successful, or None if it fails even after attempting to close open structures.
"""

# First, try to parse the string as-is. If it's valid JSON, we'll return it directly.
try:
return json.loads(s)
except json.JSONDecodeError:
pass # The string is not valid JSON. We'll try to handle this case below.

# First, make sure newlines inside double quotes are escaped properly (a common error in GPT function calls)
s = escape_newlines_in_json_string_values(s)

# Initialize a stack to keep track of open braces and brackets.
# Initialize a stack to keep track of open braces, brackets, and strings.
stack = []

# Initialize a flag to keep track of whether we're currently inside a string.
is_inside_string = False
escaped = False

# Process each character in the string one at a time.
for char in s:

# Handle quotes, which denote the start or end of a string in JSON.
if char == '"':

if stack and stack[-1] == '\\': # <- This is a single backslash, even though it looks like two!

# This quote is escaped, so it doesn't affect whether we're inside a string.
stack.pop()
if is_inside_string:
if char == '"' and not escaped:
is_inside_string = False
elif char == '\\':
escaped = not escaped
else:
# This quote is not escaped, so it toggles whether we're inside a string.
is_inside_string = not is_inside_string

# If we're not inside a string, we need to handle braces and brackets.
elif not is_inside_string:
if char == '{' or char == '[':
# This character opens a new structure, so add it to the stack.
stack.append(char)
escaped = False
else:
if char == '"':
is_inside_string = True
escaped = False
elif char == '{':
stack.append('}')
elif char == '[':
stack.append(']')
elif char == '}' or char == ']':
# This character closes a structure, so remove the most recently opened structure from the stack.
if stack:
if stack and stack[-1] == char:
stack.pop()
else:
# Mismatched closing character; the input is malformed.
return None

# If we're still inside a string at the end of processing, we need to close the string.
if is_inside_string:
s += '"'

# Close any remaining open structures in the reverse order that they were opened.
while stack:
open_char = stack.pop()
s += '}' if open_char == '{' else ']'
for closing_char in reversed(stack):
s += closing_char

# Attempt to parse the string as JSON again now that we've closed all open structures.
# Attempt to parse the modified string as JSON.
try:
return json.loads(s)
except json.JSONDecodeError:
# If we still can't parse the string as JSON, return None to indicate failure.
return None
return None

0 comments on commit a856ac0

Please sign in to comment.