From 583b54c854838ac6799d5e0d3450f8a342c094df Mon Sep 17 00:00:00 2001 From: Naman Tyagi Date: Sun, 6 Oct 2024 20:20:43 +0530 Subject: [PATCH 1/4] Fix grammar, typos, and consistency in CREDITS.md (#4229) --- CREDITS.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CREDITS.md b/CREDITS.md index 2bc97704a735..873742b7e011 100644 --- a/CREDITS.md +++ b/CREDITS.md @@ -2,7 +2,7 @@ ## Contributors -We would like to thank all the [contributors](https://github.com/All-Hands-AI/OpenHands/graphs/contributors) who have helped make OpenHands possible. Your dedication and hard work are greatly appreciated. +We would like to thank all the [contributors](https://github.com/All-Hands-AI/OpenHands/graphs/contributors) who have helped make OpenHands possible. We greatly appreciate your dedication and hard work. ## Open Source Projects @@ -10,7 +10,7 @@ OpenHands includes and adapts the following open source projects. We are gratefu #### [SWE Agent](https://github.com/princeton-nlp/swe-agent) - License: MIT License - - Description: Adapted for use in OpenHands's agenthub + - Description: Adapted for use in OpenHands's agent hub #### [Aider](https://github.com/paul-gauthier/aider) - License: Apache License 2.0 From e3450bb8c93f66cec63f4df6e7975e09ca0ef0b4 Mon Sep 17 00:00:00 2001 From: mamoodi Date: Sun, 6 Oct 2024 13:48:05 -0400 Subject: [PATCH 2/4] Update README to installation guides for system requirements (#4232) --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 62645d8d1a98..8c3cb4678762 100644 --- a/README.md +++ b/README.md @@ -36,7 +36,7 @@ Learn more at [docs.all-hands.dev](https://docs.all-hands.dev), or jump to the [ The easiest way to run OpenHands is in Docker. You can change `WORKSPACE_BASE` below to point OpenHands to existing code that you'd like to modify. -See the [Getting Started](https://docs.all-hands.dev/modules/usage/getting-started) guide for +See the [Installation](https://docs.all-hands.dev/modules/usage/installation) guide for system requirements and more information. ```bash @@ -65,7 +65,7 @@ You'll need a model provider and API key. One option that works well: [Claude 3. You can also run OpenHands in a scriptable [headless mode](https://docs.all-hands.dev/modules/usage/how-to/headless-mode), or as an [interactive CLI](https://docs.all-hands.dev/modules/usage/how-to/cli-mode). -Visit [Getting Started](https://docs.all-hands.dev/modules/usage/getting-started) for more information and setup instructions. +Visit [Installation](https://docs.all-hands.dev/modules/usage/installation) for more information and setup instructions. If you want to modify the OpenHands source code, check out [Development.md](https://github.com/All-Hands-AI/OpenHands/blob/main/Development.md). From 09243eba070d6c5b4849c51acbb11b4c6236013a Mon Sep 17 00:00:00 2001 From: mamoodi Date: Sun, 6 Oct 2024 13:48:21 -0400 Subject: [PATCH 3/4] Small changes to getting started (#4233) --- docs/modules/usage/getting-started.mdx | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/docs/modules/usage/getting-started.mdx b/docs/modules/usage/getting-started.mdx index 37d11c8090c5..d2cebc5c2de7 100644 --- a/docs/modules/usage/getting-started.mdx +++ b/docs/modules/usage/getting-started.mdx @@ -9,28 +9,30 @@ engineering tasks without any guidance. So it's important to get a feel for what does well, and where it might need some help. ## Hello World + The first thing you might want to try is a simple "hello world" example. This can be more complicated than it sounds! Try prompting the agent with: > Please write a bash script hello.sh that prints "hello world!" -You should see that the agent not only writes the script--it sets the correct +You should see that the agent not only writes the script, it sets the correct permissions and runs the script to check the output. You can continue prompting the agent to refine your code. This is a great way to -work with agents--start simple, and iterate. +work with agents. Start simple, and iterate. > Please modify hello.sh so that it accepts a name as the first argument, but defaults to "world" -You can also work in any language you need--though the agent might need to spend some +You can also work in any language you need, though the agent might need to spend some time setting up its environment! > Please convert hello.sh to a Ruby script, and run it -## Building from scratch -Agents do exceptionally well at "greenfield" tasks--tasks where they don't need -any context about an existing codebase, and they can just start from scratch. +## Building From Scratch + +Agents do exceptionally well at "greenfield" tasks (tasks where they don't need +any context about an existing codebase) and they can just start from scratch. It's best to start with a simple task, and then iterate on it. It's also best to be as specific as possible about what you want, what the tech stack should be, etc. @@ -51,7 +53,8 @@ You can ask the agent to commit and push for you: > Please commit the changes and push them to a new branch called "feature/due-dates" -## Adding new code +## Adding New Code + OpenHands can also do a great job adding new code to an existing code base. For example, you can ask OpenHands to add a new GitHub action to your project @@ -70,6 +73,7 @@ and more accurately. And it'll cost you fewer tokens! > directory. It should use the existing Widget component. ## Refactoring + OpenHands does great at refactoring existing code, especially in small chunks. You probably don't want to try rearchitecting your whole codebase, but breaking up long files and functions, renaming variables, etc. tend to work very well. @@ -81,6 +85,7 @@ long files and functions, renaming variables, etc. tend to work very well. > Please break ./api/routes.js into separate files for each route ## Bug Fixes + OpenHands can also help you track down and fix bugs in your code. But, as any developer knows, bug fixing can be extremely tricky, and often OpenHands will need more context. It helps if you've diagnosed the bug, but want OpenHands to figure out the logic. @@ -95,6 +100,7 @@ You can ask the agent to write a new test, and then iterate until it fixes the b > The `hello` function crashes on the empty string. Please write a test that reproduces this bug, then fix the code so it passes. ## More + OpenHands is capable of helping out on just about any coding task. But it takes some practice to get the most out of it. Remember to: * Keep your tasks small From 6b1f23a20ad43522048fdcb7872edd8eb0cdb4ac Mon Sep 17 00:00:00 2001 From: Engel Nyst Date: Mon, 7 Oct 2024 04:03:13 +0200 Subject: [PATCH 4/4] Fix browsing actions to be more robust (#4226) --- agenthub/browsing_agent/response_parser.py | 27 +++++++++-- tests/unit/test_browsing_agent_parser.py | 54 ++++++++++++++++++++++ 2 files changed, 77 insertions(+), 4 deletions(-) create mode 100644 tests/unit/test_browsing_agent_parser.py diff --git a/agenthub/browsing_agent/response_parser.py b/agenthub/browsing_agent/response_parser.py index a5cc3fd0543b..6ececb9482d8 100644 --- a/agenthub/browsing_agent/response_parser.py +++ b/agenthub/browsing_agent/response_parser.py @@ -72,17 +72,36 @@ def check_condition(self, action_str: str) -> bool: return True def parse(self, action_str: str) -> Action: - thought = action_str.split('```')[0].strip() - action_str = action_str.split('```')[1].strip() + # parse the action string into browser_actions and thought + # the LLM can return only one string, or both + + # when both are returned, it looks like this: + ### Based on the current state of the page and the goal of finding out the president of the USA, the next action should involve searching for information related to the president. + ### To achieve this, we can navigate to a reliable source such as a search engine or a specific website that provides information about the current president of the USA. + ### Here is an example of a valid action to achieve this: + ### ``` + ### goto('https://www.whitehouse.gov/about-the-white-house/presidents/' + # in practice, BrowsingResponseParser.parse_response also added )``` to the end of the string + + # when the LLM returns only one string, it looks like this: + ### goto('https://www.whitehouse.gov/about-the-white-house/presidents/') + # and parse_response added )``` to the end of the string + parts = action_str.split('```') + browser_actions = ( + parts[1].strip() if parts[1].strip() != '' else parts[0].strip() + ) + thought = parts[0].strip() if parts[1].strip() != '' else '' + + # if the LLM wants to talk to the user, we extract the message msg_content = '' - for sub_action in action_str.split('\n'): + for sub_action in browser_actions.split('\n'): if 'send_msg_to_user(' in sub_action: tree = ast.parse(sub_action) args = tree.body[0].value.args # type: ignore msg_content = args[0].value return BrowseInteractiveAction( - browser_actions=action_str, + browser_actions=browser_actions, thought=thought, browsergym_send_msg_to_user=msg_content, ) diff --git a/tests/unit/test_browsing_agent_parser.py b/tests/unit/test_browsing_agent_parser.py new file mode 100644 index 000000000000..bc1372611c2d --- /dev/null +++ b/tests/unit/test_browsing_agent_parser.py @@ -0,0 +1,54 @@ +import pytest + +from agenthub.browsing_agent.response_parser import ( + BrowseInteractiveAction, + BrowsingResponseParser, +) + + +@pytest.mark.parametrize( + 'action_str, expected', + [ + ("click('81'", "click('81')```"), + ( + '"We need to search the internet\n```goto("google.com")', + '"We need to search the internet\n```goto("google.com"))```', + ), + ("```click('81'", "```click('81')```"), + ("click('81')", "click('81'))```"), + ], +) +def test_parse_response(action_str: str, expected: str) -> None: + # BrowsingResponseParser.parse_response + parser = BrowsingResponseParser() + response = {'choices': [{'message': {'content': action_str}}]} + result = parser.parse_response(response) + assert result == expected + + +@pytest.mark.parametrize( + 'action_str, expected_browser_actions, expected_thought, expected_msg_content', + [ + ("click('81')```", "click('81')", '', ''), + ("```click('81')```", "click('81')", '', ''), + ( + "We need to perform a click\n```click('81')", + "click('81')", + 'We need to perform a click', + '', + ), + ], +) +def test_parse_action( + action_str: str, + expected_browser_actions: str, + expected_thought: str, + expected_msg_content: str, +) -> None: + # BrowsingResponseParser.parse_action + parser = BrowsingResponseParser() + action = parser.parse_action(action_str) + assert isinstance(action, BrowseInteractiveAction) + assert action.browser_actions == expected_browser_actions + assert action.thought == expected_thought + assert action.browsergym_send_msg_to_user == expected_msg_content