From 660d1d1e64c5e28e96bf9b8172cd87d1d809fd07 Mon Sep 17 00:00:00 2001
From: Engel Nyst <enyst@users.noreply.github.com>
Date: Sun, 2 Mar 2025 05:37:15 +0100
Subject: [PATCH 1/7] Fix argument in swe-bench grading scripts (#7046)

---
 evaluation/benchmarks/swe_bench/eval_infer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/evaluation/benchmarks/swe_bench/eval_infer.py b/evaluation/benchmarks/swe_bench/eval_infer.py
index 7ba191cd4893..bdb09976efc1 100644
--- a/evaluation/benchmarks/swe_bench/eval_infer.py
+++ b/evaluation/benchmarks/swe_bench/eval_infer.py
@@ -288,7 +288,7 @@ def process_instance(
                                     'model_patch': model_patch,
                                     'instance_id': instance_id,
                                 },
-                                log_path=test_output_path,
+                                test_log_path=test_output_path,
                                 include_tests_status=True,
                             )
                             report = _report[instance_id]

From ff5d8094de702e248858c00a8ff1ba0167c9d743 Mon Sep 17 00:00:00 2001
From: mamoodi <mamoodiha@gmail.com>
Date: Sun, 2 Mar 2025 10:35:47 -0500
Subject: [PATCH 2/7] Updates to the ISSUE TRIAGE (#7043)

---
 ISSUE_TRIAGE.md | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/ISSUE_TRIAGE.md b/ISSUE_TRIAGE.md
index 0bec6484825a..9043ab8f150e 100644
--- a/ISSUE_TRIAGE.md
+++ b/ISSUE_TRIAGE.md
@@ -2,12 +2,13 @@
 These are the procedures and guidelines on how issues are triaged in this repo by the maintainers.
 
 ## General
-* Most issues must be tagged with **enhancement** or **bug**.
-* Issues may be tagged with what it relates to (**backend**, **frontend**, **agent quality**, etc.).
+* All issues must be tagged with **enhancement**, **bug** or **troubleshooting/help**.
+* Issues may be tagged with what it relates to (**agent quality**, **frontend**, **resolver**, etc.).
 
 ## Severity
 * **Low**: Minor issues or affecting single user.
 * **Medium**: Affecting multiple users.
+* **High**: High visibility issues or affecting many users.
 * **Critical**: Affecting all users or potential security issues.
 
 ## Effort
@@ -18,8 +19,14 @@ These are the procedures and guidelines on how issues are triaged in this repo b
 
 ## Not Enough Information
 * User is asked to provide more information (logs, how to reproduce, etc.) when the issue is not clear.
-* If an issue is unclear and the author does not provide more information or respond to a request, the issue may be closed as **not planned** (Usually after a week).
+* If an issue is unclear and the author does not provide more information or respond to a request,
+the issue may be closed as **not planned** (Usually after a week).
 
 ## Multiple Requests/Fixes in One Issue
 * These issues will be narrowed down to one request/fix so the issue is more easily tracked and fixed.
 * Issues may be broken down into multiple issues if required.
+
+## Stale and Auto Closures
+* In order to keep a maintainable backlog, issues that have no activity within 30 days are automatically marked as **Stale**.
+* If issues marked as **Stale** continue to have no activity for 7 more days, they will automatically be closed as not planned.
+* Issues may be reopened by maintainers if deemed important.

From 85c0864802f1aafbf96bb541e5a12d6bc14c960b Mon Sep 17 00:00:00 2001
From: Ivan Dagelic <dagelic.ivan@gmail.com>
Date: Sun, 2 Mar 2025 17:43:38 +0100
Subject: [PATCH 3/7] chore: update daytona readme (#7053)

Signed-off-by: Ivan Dagelic <dagelic.ivan@gmail.com>
---
 openhands/runtime/impl/daytona/README.md | 42 +++++++++++++++++++++---
 1 file changed, 37 insertions(+), 5 deletions(-)

diff --git a/openhands/runtime/impl/daytona/README.md b/openhands/runtime/impl/daytona/README.md
index dfaa7e3dc02f..c9d71408613a 100644
--- a/openhands/runtime/impl/daytona/README.md
+++ b/openhands/runtime/impl/daytona/README.md
@@ -2,22 +2,54 @@
 
 [Daytona](https://www.daytona.io/) is a platform that provides a secure and elastic infrastructure for running AI-generated code. It provides all the necessary features for an AI Agent to interact with a codebase. It provides a Daytona SDK with official Python and TypeScript interfaces for interacting with Daytona, enabling you to programmatically manage development environments and execute code.
 
+## Quick start
+
+Get your Daytona API key from https://app.daytona.io/dashboard/keys and export it:
+
+```bash
+export DAYTONA_API_KEY="<your-api-key>"
+```
+
+Use the following command to run the latest OpenHands release locally using Docker:
+
+```bash
+bash -i <(curl -sL https://get.daytona.io/openhands)
+```
+
+
 ## Getting started
 
 1. Sign in at https://app.daytona.io/
 
 1. Generate and copy your API key
 
-1. Set the following environment variables before running the OpenHands app on your local machine or via a `docker run` command:
+1. Set the `OPENHANDS_VERSION` environment variable to the latest release's version seen in the main README.md file; as well as the `DAYTONA_API_KEY`
 
 ```bash
-    RUNTIME="daytona"
-    DAYTONA_API_KEY="<your-api-key>"
+export OPENHANDS_VERSION=<OPENHANDS_RELEASE>  # e.g. 0.27
+export DAYTONA_API_KEY=<your_api_key>
 ```
-Optionally, if you don't want your sandboxes to default to the US region, set:
+
+1. Run the following `docker` command:
+
+```bash
+docker run -it --rm --pull=always \
+    -e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:${OPENHANDS_VERSION}-nikolaik \
+    -e LOG_ALL_EVENTS=true \
+    -e RUNTIME=daytona \
+    -e DAYTONA_API_KEY=${DAYTONA_API_KEY} \
+    -v ~/.openhands-state:/.openhands-state \
+    -p 3000:3000 \
+    --name openhands-app \
+    docker.all-hands.dev/all-hands-ai/openhands:${OPENHANDS_VERSION}
+```
+> **Tip:** If you don't want your sandboxes to default to the US region, you can set the `DAYTONA_TARGET` environment variable to `eu`
+
+Alternatively, if you want to run the OpenHands app on your local machine using `make run` without Docker, set the following environment variables first:
 
 ```bash
-    DAYTONA_TARGET="eu"
+export RUNTIME="daytona"
+export DAYTONA_API_KEY="<your-api-key>"
 ```
 
 ## Documentation

From cf439fa89cf45a5462336a10c3dfee4ab4c0ace8 Mon Sep 17 00:00:00 2001
From: Ivan Dagelic <dagelic.ivan@gmail.com>
Date: Sun, 2 Mar 2025 20:17:35 +0100
Subject: [PATCH 4/7] chore: daytona readme quick start verbosity (#7056)

Signed-off-by: Ivan Dagelic <dagelic.ivan@gmail.com>
---
 openhands/runtime/impl/daytona/README.md | 48 ++++++++++++++++++------
 1 file changed, 37 insertions(+), 11 deletions(-)

diff --git a/openhands/runtime/impl/daytona/README.md b/openhands/runtime/impl/daytona/README.md
index c9d71408613a..ba19ae5f88fa 100644
--- a/openhands/runtime/impl/daytona/README.md
+++ b/openhands/runtime/impl/daytona/README.md
@@ -2,35 +2,58 @@
 
 [Daytona](https://www.daytona.io/) is a platform that provides a secure and elastic infrastructure for running AI-generated code. It provides all the necessary features for an AI Agent to interact with a codebase. It provides a Daytona SDK with official Python and TypeScript interfaces for interacting with Daytona, enabling you to programmatically manage development environments and execute code.
 
-## Quick start
+## Quick Start
 
-Get your Daytona API key from https://app.daytona.io/dashboard/keys and export it:
+### Step 1: Retrieve Your Daytona API Key
+1. Visit the [Daytona Dashboard](https://app.daytona.io/dashboard/keys).
+2. Click **"Create Key"**.
+3. Enter a name for your key and confirm the creation.
+4. Once the key is generated, copy it.
 
+### Step 2: Set Your API Key as an Environment Variable
+Run the following command in your terminal, replacing `<your-api-key>` with the actual key you copied:
 ```bash
 export DAYTONA_API_KEY="<your-api-key>"
 ```
 
-Use the following command to run the latest OpenHands release locally using Docker:
+This step ensures that OpenHands can authenticate with the Daytona platform when it runs.
 
+### Step 3: Run OpenHands Locally Using Docker
+To start the latest version of OpenHands on your machine, execute the following command in your terminal:
 ```bash
 bash -i <(curl -sL https://get.daytona.io/openhands)
 ```
 
+#### What This Command Does:
+- Downloads the latest OpenHands release script.
+- Runs the script in an interactive Bash session.
+- Automatically pulls and runs the OpenHands container using Docker.
+Once executed, OpenHands should be running locally and ready for use.
 
-## Getting started
 
-1. Sign in at https://app.daytona.io/
+## Manual Initialization
 
-1. Generate and copy your API key
+### Step 1: Set the `OPENHANDS_VERSION` Environment Variable
+Run the following command in your terminal, replacing `<openhands-release>` with the latest release's version seen in the [main README.md file](https://github.com/All-Hands-AI/OpenHands?tab=readme-ov-file#-quick-start):
 
-1. Set the `OPENHANDS_VERSION` environment variable to the latest release's version seen in the main README.md file; as well as the `DAYTONA_API_KEY`
+```bash
+export OPENHANDS_VERSION="<openhands-release>"  # e.g. 0.27
+```
+
+### Step 2: Retrieve Your Daytona API Key
+1. Visit the [Daytona Dashboard](https://app.daytona.io/dashboard/keys).
+2. Click **"Create Key"**.
+3. Enter a name for your key and confirm the creation.
+4. Once the key is generated, copy it.
 
+### Step 3: Set Your API Key as an Environment Variable:
+Run the following command in your terminal, replacing `<your-api-key>` with the actual key you copied:
 ```bash
-export OPENHANDS_VERSION=<OPENHANDS_RELEASE>  # e.g. 0.27
-export DAYTONA_API_KEY=<your_api_key>
+export DAYTONA_API_KEY="<your-api-key>"
 ```
 
-1. Run the following `docker` command:
+### Step 4: Run the following `docker` command:
+This command pulls and runs the OpenHands container using Docker. Once executed, OpenHands should be running locally and ready for use.
 
 ```bash
 docker run -it --rm --pull=always \
@@ -43,9 +66,12 @@ docker run -it --rm --pull=always \
     --name openhands-app \
     docker.all-hands.dev/all-hands-ai/openhands:${OPENHANDS_VERSION}
 ```
+
 > **Tip:** If you don't want your sandboxes to default to the US region, you can set the `DAYTONA_TARGET` environment variable to `eu`
 
-Alternatively, if you want to run the OpenHands app on your local machine using `make run` without Docker, set the following environment variables first:
+### Running OpenHands Locally Without Docker
+
+Alternatively, if you want to run the OpenHands app on your local machine using `make run` without Docker, make sure to set the following environment variables first:
 
 ```bash
 export RUNTIME="daytona"

From 62750c07e5c61d5cf8bfc9086ac2e54152398222 Mon Sep 17 00:00:00 2001
From: Engel Nyst <enyst@users.noreply.github.com>
Date: Sun, 2 Mar 2025 21:33:07 +0100
Subject: [PATCH 5/7] Fix GitLab CI environment variable check (issue #7050)
 (#7052)

Co-authored-by: openhands <openhands@all-hands.dev>
---
 openhands/resolver/resolve_issue.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/openhands/resolver/resolve_issue.py b/openhands/resolver/resolve_issue.py
index 9a686815da38..a4116579a2ca 100644
--- a/openhands/resolver/resolve_issue.py
+++ b/openhands/resolver/resolve_issue.py
@@ -202,7 +202,7 @@ async def process_issue(
         timeout=300,
     )
 
-    if os.getenv('GITLAB_CI') == 'True':
+    if os.getenv('GITLAB_CI') == 'true':
         sandbox_config.local_runtime_url = os.getenv(
             'LOCAL_RUNTIME_URL', 'http://localhost'
         )

From 91ad59dc2414a34bc2ed01571937b9475cd0d5fb Mon Sep 17 00:00:00 2001
From: Graham Neubig <neubig@gmail.com>
Date: Sun, 2 Mar 2025 17:21:07 -0500
Subject: [PATCH 6/7] More explicit feedback message about how to report errors
 to developers (#7063)

---
 openhands/controller/agent_controller.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/openhands/controller/agent_controller.py b/openhands/controller/agent_controller.py
index 406c4fa4fad5..fdcc829ec0fd 100644
--- a/openhands/controller/agent_controller.py
+++ b/openhands/controller/agent_controller.py
@@ -248,8 +248,9 @@ async def _step_with_exception_handling(self):
             )
             reported = RuntimeError(
                 'There was an unexpected error while running the agent. Please '
-                f'report this error to the developers. Your session ID is {self.id}. '
-                f'Error type: {e.__class__.__name__}'
+                'report this error to the developers by opening an issue at '
+                'https://github.com/All-Hands-AI/OpenHands. Your session ID is '
+                f' {self.id}. Error type: {e.__class__.__name__}'
             )
             if (
                 isinstance(e, litellm.AuthenticationError)

From 395c1ea9e3f593848c53b11dd896b60a3b8279b1 Mon Sep 17 00:00:00 2001
From: Engel Nyst <enyst@users.noreply.github.com>
Date: Mon, 3 Mar 2025 00:19:25 +0100
Subject: [PATCH 7/7] [Refactor] split runtime initialization (create, connect,
 init) in cli scripts (#7036)

---
 evaluation/benchmarks/EDA/run_infer.py        |  2 +
 .../benchmarks/agent_bench/run_infer.py       |  2 +
 .../benchmarks/aider_bench/run_infer.py       |  3 +-
 evaluation/benchmarks/biocoder/run_infer.py   |  2 +
 evaluation/benchmarks/bird/run_infer.py       |  2 +
 .../browsing_delegation/run_infer.py          |  2 +
 .../benchmarks/commit0_bench/run_infer.py     |  2 +
 .../benchmarks/discoverybench/run_infer.py    |  2 +
 evaluation/benchmarks/gaia/run_infer.py       |  2 +
 evaluation/benchmarks/gorilla/run_infer.py    |  2 +
 evaluation/benchmarks/gpqa/run_infer.py       |  2 +
 .../benchmarks/humanevalfix/run_infer.py      |  2 +
 .../benchmarks/logic_reasoning/run_infer.py   |  2 +
 evaluation/benchmarks/miniwob/run_infer.py    |  2 +
 evaluation/benchmarks/mint/run_infer.py       |  2 +
 evaluation/benchmarks/ml_bench/run_infer.py   |  2 +
 .../benchmarks/scienceagentbench/run_infer.py |  2 +
 evaluation/benchmarks/swe_bench/run_infer.py  |  2 +
 .../benchmarks/the_agent_company/run_infer.py |  3 +-
 evaluation/benchmarks/toolqa/run_infer.py     |  2 +
 .../benchmarks/visualwebarena/run_infer.py    |  3 ++
 evaluation/benchmarks/webarena/run_infer.py   |  2 +
 evaluation/integration_tests/run_infer.py     |  2 +
 openhands/core/cli.py                         | 16 +++++-
 openhands/core/main.py                        | 13 ++++-
 openhands/core/setup.py                       | 50 +++++++++++++------
 26 files changed, 107 insertions(+), 21 deletions(-)

diff --git a/evaluation/benchmarks/EDA/run_infer.py b/evaluation/benchmarks/EDA/run_infer.py
index 636a52e2bd74..f216a86ff8ca 100644
--- a/evaluation/benchmarks/EDA/run_infer.py
+++ b/evaluation/benchmarks/EDA/run_infer.py
@@ -24,6 +24,7 @@
 from openhands.core.logger import openhands_logger as logger
 from openhands.core.main import create_runtime, run_controller
 from openhands.events.action import MessageAction
+from openhands.utils.async_utils import call_async_from_sync
 
 game = None
 
@@ -121,6 +122,7 @@ def process_instance(
 
     # Here's how you can run the agent (similar to the `main` function) and get the final task state
     runtime = create_runtime(config)
+    call_async_from_sync(runtime.connect)
 
     state: State | None = asyncio.run(
         run_controller(
diff --git a/evaluation/benchmarks/agent_bench/run_infer.py b/evaluation/benchmarks/agent_bench/run_infer.py
index 68cf2ff793ce..a78e40239548 100644
--- a/evaluation/benchmarks/agent_bench/run_infer.py
+++ b/evaluation/benchmarks/agent_bench/run_infer.py
@@ -34,6 +34,7 @@
 from openhands.events.action import AgentFinishAction, CmdRunAction, MessageAction
 from openhands.events.observation import CmdOutputObservation
 from openhands.runtime.base import Runtime
+from openhands.utils.async_utils import call_async_from_sync
 
 
 def get_config(
@@ -210,6 +211,7 @@ def process_instance(
     # =============================================
 
     runtime: Runtime = create_runtime(config)
+    call_async_from_sync(runtime.connect)
 
     initialize_runtime(runtime, instance=instance)
 
diff --git a/evaluation/benchmarks/aider_bench/run_infer.py b/evaluation/benchmarks/aider_bench/run_infer.py
index 9c848f67b154..0d97496acd33 100644
--- a/evaluation/benchmarks/aider_bench/run_infer.py
+++ b/evaluation/benchmarks/aider_bench/run_infer.py
@@ -34,6 +34,7 @@
 from openhands.events.action import CmdRunAction, MessageAction
 from openhands.events.observation import CmdOutputObservation
 from openhands.runtime.base import Runtime
+from openhands.utils.async_utils import call_async_from_sync
 
 # Configure visibility of unit tests to the Agent.
 USE_UNIT_TESTS = os.environ.get('USE_UNIT_TESTS', 'false').lower() == 'true'
@@ -203,7 +204,7 @@ def process_instance(
     # =============================================
 
     runtime: Runtime = create_runtime(config)
-
+    call_async_from_sync(runtime.connect)
     initialize_runtime(runtime, instance=instance)
 
     # Here's how you can run the agent (similar to the `main` function) and get the final task state
diff --git a/evaluation/benchmarks/biocoder/run_infer.py b/evaluation/benchmarks/biocoder/run_infer.py
index b0a06a6ece71..f1c98ed06672 100644
--- a/evaluation/benchmarks/biocoder/run_infer.py
+++ b/evaluation/benchmarks/biocoder/run_infer.py
@@ -31,6 +31,7 @@
 from openhands.events.action import CmdRunAction, MessageAction
 from openhands.events.observation import CmdOutputObservation
 from openhands.runtime.base import Runtime
+from openhands.utils.async_utils import call_async_from_sync
 
 AGENT_CLS_TO_FAKE_USER_RESPONSE_FN = {
     'CodeActAgent': functools.partial(
@@ -274,6 +275,7 @@ def process_instance(
     instruction += AGENT_CLS_TO_INST_SUFFIX[metadata.agent_class]
 
     runtime = create_runtime(config)
+    call_async_from_sync(runtime.connect)
     initialize_runtime(runtime, instance)
 
     # Here's how you can run the agent (similar to the `main` function) and get the final task state
diff --git a/evaluation/benchmarks/bird/run_infer.py b/evaluation/benchmarks/bird/run_infer.py
index 4cdd29862f38..1c56deb9670c 100644
--- a/evaluation/benchmarks/bird/run_infer.py
+++ b/evaluation/benchmarks/bird/run_infer.py
@@ -34,6 +34,7 @@
 from openhands.events.action import CmdRunAction, MessageAction
 from openhands.events.observation import CmdOutputObservation
 from openhands.runtime.base import Runtime
+from openhands.utils.async_utils import call_async_from_sync
 
 
 def codeact_user_response(state: State) -> str:
@@ -399,6 +400,7 @@ def execute_sql(db_path, sql):
     instruction += AGENT_CLS_TO_INST_SUFFIX[metadata.agent_class]
 
     runtime = create_runtime(config)
+    call_async_from_sync(runtime.connect)
     initialize_runtime(runtime, instance)
 
     # Here's how you can run the agent (similar to the `main` function) and get the final task state
diff --git a/evaluation/benchmarks/browsing_delegation/run_infer.py b/evaluation/benchmarks/browsing_delegation/run_infer.py
index 5f3ee99d7437..0ef080dbcaf2 100644
--- a/evaluation/benchmarks/browsing_delegation/run_infer.py
+++ b/evaluation/benchmarks/browsing_delegation/run_infer.py
@@ -25,6 +25,7 @@
 from openhands.core.logger import openhands_logger as logger
 from openhands.core.main import create_runtime, run_controller
 from openhands.events.action import MessageAction
+from openhands.utils.async_utils import call_async_from_sync
 
 # Only CodeActAgent can delegate to BrowsingAgent
 SUPPORTED_AGENT_CLS = {'CodeActAgent'}
@@ -74,6 +75,7 @@ def process_instance(
     )
 
     runtime = create_runtime(config)
+    call_async_from_sync(runtime.connect)
 
     state: State | None = asyncio.run(
         run_controller(
diff --git a/evaluation/benchmarks/commit0_bench/run_infer.py b/evaluation/benchmarks/commit0_bench/run_infer.py
index cf6148975b7a..63d394a029d1 100644
--- a/evaluation/benchmarks/commit0_bench/run_infer.py
+++ b/evaluation/benchmarks/commit0_bench/run_infer.py
@@ -35,6 +35,7 @@
 from openhands.events.observation import CmdOutputObservation, ErrorObservation
 from openhands.events.serialization.event import event_to_dict
 from openhands.runtime.base import Runtime
+from openhands.utils.async_utils import call_async_from_sync
 from openhands.utils.shutdown_listener import sleep_if_should_continue
 
 USE_HINT_TEXT = os.environ.get('USE_HINT_TEXT', 'false').lower() == 'true'
@@ -394,6 +395,7 @@ def process_instance(
         logger.info(f'Starting evaluation for instance {instance.instance_id}.')
 
     runtime = create_runtime(config)
+    call_async_from_sync(runtime.connect)
     try:
         initialize_runtime(runtime, instance)
 
diff --git a/evaluation/benchmarks/discoverybench/run_infer.py b/evaluation/benchmarks/discoverybench/run_infer.py
index f3fdadab8e26..d91d01194d83 100644
--- a/evaluation/benchmarks/discoverybench/run_infer.py
+++ b/evaluation/benchmarks/discoverybench/run_infer.py
@@ -34,6 +34,7 @@
 from openhands.events.action import AgentFinishAction, CmdRunAction, MessageAction
 from openhands.events.observation import CmdOutputObservation
 from openhands.runtime.base import Runtime
+from openhands.utils.async_utils import call_async_from_sync
 
 EVALUATION_LLM = 'gpt-4-1106-preview'
 
@@ -281,6 +282,7 @@ def process_instance(
 
     # Here's how you can run the agent (similar to the `main` function) and get the final task state
     runtime = create_runtime(config)
+    call_async_from_sync(runtime.connect)
     initialize_runtime(runtime, instance.data_files)
 
     state: State | None = asyncio.run(
diff --git a/evaluation/benchmarks/gaia/run_infer.py b/evaluation/benchmarks/gaia/run_infer.py
index e0e5ed0363c2..e63026e813e4 100644
--- a/evaluation/benchmarks/gaia/run_infer.py
+++ b/evaluation/benchmarks/gaia/run_infer.py
@@ -31,6 +31,7 @@
 from openhands.events.action import AgentFinishAction, CmdRunAction, MessageAction
 from openhands.events.observation import CmdOutputObservation
 from openhands.runtime.base import Runtime
+from openhands.utils.async_utils import call_async_from_sync
 
 DATASET_CACHE_DIR = os.path.join(os.path.dirname(__file__), 'data')
 
@@ -148,6 +149,7 @@ def process_instance(
     logger.info(f'Instruction:\n{instruction}', extra={'msg_type': 'OBSERVATION'})
 
     runtime = create_runtime(config)
+    call_async_from_sync(runtime.connect)
     initialize_runtime(runtime, instance)
 
     # Here's how you can run the agent (similar to the `main` function) and get the final task state
diff --git a/evaluation/benchmarks/gorilla/run_infer.py b/evaluation/benchmarks/gorilla/run_infer.py
index 22b42a8545ea..e856fa267c03 100644
--- a/evaluation/benchmarks/gorilla/run_infer.py
+++ b/evaluation/benchmarks/gorilla/run_infer.py
@@ -26,6 +26,7 @@
 from openhands.core.logger import openhands_logger as logger
 from openhands.core.main import create_runtime, run_controller
 from openhands.events.action import MessageAction
+from openhands.utils.async_utils import call_async_from_sync
 
 AGENT_CLS_TO_FAKE_USER_RESPONSE_FN = {
     'CodeActAgent': codeact_user_response,
@@ -82,6 +83,7 @@ def process_instance(
 
     # Here's how you can run the agent (similar to the `main` function) and get the final task state
     runtime = create_runtime(config)
+    call_async_from_sync(runtime.connect)
     state: State | None = asyncio.run(
         run_controller(
             config=config,
diff --git a/evaluation/benchmarks/gpqa/run_infer.py b/evaluation/benchmarks/gpqa/run_infer.py
index 0f19755c3427..e297e3fb9ed5 100644
--- a/evaluation/benchmarks/gpqa/run_infer.py
+++ b/evaluation/benchmarks/gpqa/run_infer.py
@@ -49,6 +49,7 @@
     MessageAction,
 )
 from openhands.events.observation import Observation
+from openhands.utils.async_utils import call_async_from_sync
 
 ACTION_FORMAT = """
 <<FINAL_ANSWER||
@@ -214,6 +215,7 @@ def process_instance(
 """
 
     runtime = create_runtime(config)
+    call_async_from_sync(runtime.connect)
     state: State | None = asyncio.run(
         run_controller(
             config=config,
diff --git a/evaluation/benchmarks/humanevalfix/run_infer.py b/evaluation/benchmarks/humanevalfix/run_infer.py
index 82e89f06e958..fbf88859b6af 100644
--- a/evaluation/benchmarks/humanevalfix/run_infer.py
+++ b/evaluation/benchmarks/humanevalfix/run_infer.py
@@ -39,6 +39,7 @@
 from openhands.events.action import CmdRunAction, MessageAction
 from openhands.events.observation import CmdOutputObservation
 from openhands.runtime.base import Runtime
+from openhands.utils.async_utils import call_async_from_sync
 
 IMPORT_HELPER = {
     'python': [
@@ -232,6 +233,7 @@ def process_instance(
 
     # Here's how you can run the agent (similar to the `main` function) and get the final task state
     runtime = create_runtime(config)
+    call_async_from_sync(runtime.connect)
     initialize_runtime(runtime, instance)
     state: State | None = asyncio.run(
         run_controller(
diff --git a/evaluation/benchmarks/logic_reasoning/run_infer.py b/evaluation/benchmarks/logic_reasoning/run_infer.py
index 543416deea02..fac82f29f510 100644
--- a/evaluation/benchmarks/logic_reasoning/run_infer.py
+++ b/evaluation/benchmarks/logic_reasoning/run_infer.py
@@ -31,6 +31,7 @@
 )
 from openhands.events.observation import CmdOutputObservation
 from openhands.runtime.base import Runtime
+from openhands.utils.async_utils import call_async_from_sync
 
 AGENT_CLS_TO_FAKE_USER_RESPONSE_FN = {
     'CodeActAgent': codeact_user_response,
@@ -206,6 +207,7 @@ def process_instance(
     instruction += AGENT_CLS_TO_INST_SUFFIX[metadata.agent_class]
 
     runtime = create_runtime(config)
+    call_async_from_sync(runtime.connect)
     initialize_runtime(runtime, instance)
 
     # Here's how you can run the agent (similar to the `main` function) and get the final task state
diff --git a/evaluation/benchmarks/miniwob/run_infer.py b/evaluation/benchmarks/miniwob/run_infer.py
index a6b354716a02..55e510818a80 100644
--- a/evaluation/benchmarks/miniwob/run_infer.py
+++ b/evaluation/benchmarks/miniwob/run_infer.py
@@ -41,6 +41,7 @@
     BROWSER_EVAL_GET_GOAL_ACTION,
     BROWSER_EVAL_GET_REWARDS_ACTION,
 )
+from openhands.utils.async_utils import call_async_from_sync
 
 SUPPORTED_AGENT_CLS = {'BrowsingAgent', 'CodeActAgent'}
 
@@ -145,6 +146,7 @@ def process_instance(
         logger.info(f'Starting evaluation for instance {env_id}.')
 
     runtime = create_runtime(config)
+    call_async_from_sync(runtime.connect)
     task_str, obs = initialize_runtime(runtime)
 
     task_str += (
diff --git a/evaluation/benchmarks/mint/run_infer.py b/evaluation/benchmarks/mint/run_infer.py
index 320c9caf47ae..bd1a394332c9 100644
--- a/evaluation/benchmarks/mint/run_infer.py
+++ b/evaluation/benchmarks/mint/run_infer.py
@@ -35,6 +35,7 @@
 )
 from openhands.events.observation import CmdOutputObservation
 from openhands.runtime.base import Runtime
+from openhands.utils.async_utils import call_async_from_sync
 
 
 def codeact_user_response_mint(state: State, task: Task, task_config: dict[str, int]):
@@ -184,6 +185,7 @@ def process_instance(
     )
 
     runtime = create_runtime(config)
+    call_async_from_sync(runtime.connect)
     initialize_runtime(runtime)
 
     state: State | None = asyncio.run(
diff --git a/evaluation/benchmarks/ml_bench/run_infer.py b/evaluation/benchmarks/ml_bench/run_infer.py
index 371314a74dd2..5eff173b4600 100644
--- a/evaluation/benchmarks/ml_bench/run_infer.py
+++ b/evaluation/benchmarks/ml_bench/run_infer.py
@@ -43,6 +43,7 @@
 from openhands.events.action import CmdRunAction, MessageAction
 from openhands.events.observation import CmdOutputObservation
 from openhands.runtime.base import Runtime
+from openhands.utils.async_utils import call_async_from_sync
 
 config = load_app_config()
 
@@ -234,6 +235,7 @@ def process_instance(instance: Any, metadata: EvalMetadata, reset_logger: bool =
     instruction += AGENT_CLS_TO_INST_SUFFIX[metadata.agent_class]
 
     runtime = create_runtime(config)
+    call_async_from_sync(runtime.connect)
     initialize_runtime(runtime, instance)
 
     # Run the agent
diff --git a/evaluation/benchmarks/scienceagentbench/run_infer.py b/evaluation/benchmarks/scienceagentbench/run_infer.py
index b84b9230e5e6..fe0cd7ef3a00 100644
--- a/evaluation/benchmarks/scienceagentbench/run_infer.py
+++ b/evaluation/benchmarks/scienceagentbench/run_infer.py
@@ -29,6 +29,7 @@
 from openhands.events.action import CmdRunAction, MessageAction
 from openhands.events.observation import CmdOutputObservation
 from openhands.runtime.base import Runtime
+from openhands.utils.async_utils import call_async_from_sync
 
 AGENT_CLS_TO_FAKE_USER_RESPONSE_FN = {
     'CodeActAgent': codeact_user_response,
@@ -195,6 +196,7 @@ def process_instance(
 """
 
     runtime = create_runtime(config)
+    call_async_from_sync(runtime.connect)
     initialize_runtime(runtime, instance)
 
     # Here's how you can run the agent (similar to the `main` function) and get the final task state
diff --git a/evaluation/benchmarks/swe_bench/run_infer.py b/evaluation/benchmarks/swe_bench/run_infer.py
index 266fc6fa2399..df9042969eb3 100644
--- a/evaluation/benchmarks/swe_bench/run_infer.py
+++ b/evaluation/benchmarks/swe_bench/run_infer.py
@@ -40,6 +40,7 @@
 from openhands.events.observation import CmdOutputObservation, ErrorObservation
 from openhands.events.serialization.event import event_to_dict
 from openhands.runtime.base import Runtime
+from openhands.utils.async_utils import call_async_from_sync
 from openhands.utils.shutdown_listener import sleep_if_should_continue
 
 USE_HINT_TEXT = os.environ.get('USE_HINT_TEXT', 'false').lower() == 'true'
@@ -464,6 +465,7 @@ def process_instance(
             f'This is the {runtime_failure_count + 1}th attempt for instance {instance.instance_id}, setting resource factor to {config.sandbox.remote_runtime_resource_factor}'
         )
     runtime = create_runtime(config)
+    call_async_from_sync(runtime.connect)
 
     try:
         initialize_runtime(runtime, instance)
diff --git a/evaluation/benchmarks/the_agent_company/run_infer.py b/evaluation/benchmarks/the_agent_company/run_infer.py
index fc5dadb27cb5..732a431aa143 100644
--- a/evaluation/benchmarks/the_agent_company/run_infer.py
+++ b/evaluation/benchmarks/the_agent_company/run_infer.py
@@ -28,6 +28,7 @@
 from openhands.events.action import CmdRunAction, MessageAction
 from openhands.events.observation import BrowserOutputObservation, CmdOutputObservation
 from openhands.runtime.base import Runtime
+from openhands.utils.async_utils import call_async_from_sync
 
 
 def get_config(
@@ -275,7 +276,7 @@ def run_evaluator(
         args.task_image_name, task_short_name, temp_dir, agent_llm_config, agent_config
     )
     runtime: Runtime = create_runtime(config)
-
+    call_async_from_sync(runtime.connect)
     init_task_env(runtime, args.server_hostname, env_llm_config)
 
     dependencies = load_dependencies(runtime)
diff --git a/evaluation/benchmarks/toolqa/run_infer.py b/evaluation/benchmarks/toolqa/run_infer.py
index b4b9724dcef5..2fc670e568c6 100644
--- a/evaluation/benchmarks/toolqa/run_infer.py
+++ b/evaluation/benchmarks/toolqa/run_infer.py
@@ -27,6 +27,7 @@
 from openhands.events.action import CmdRunAction, MessageAction
 from openhands.events.observation import CmdOutputObservation
 from openhands.runtime.base import Runtime
+from openhands.utils.async_utils import call_async_from_sync
 
 AGENT_CLS_TO_FAKE_USER_RESPONSE_FN = {
     'CodeActAgent': codeact_user_response,
@@ -104,6 +105,7 @@ def process_instance(instance: Any, metadata: EvalMetadata, reset_logger: bool =
     logger.info(f'Instruction:\n{instruction}', extra={'msg_type': 'OBSERVATION'})
 
     runtime = create_runtime(config)
+    call_async_from_sync(runtime.connect)
     initialize_runtime(runtime)
 
     # Here's how you can run the agent (similar to the `main` function) and get the final task state
diff --git a/evaluation/benchmarks/visualwebarena/run_infer.py b/evaluation/benchmarks/visualwebarena/run_infer.py
index 4a1bce5f977c..75529155a26a 100644
--- a/evaluation/benchmarks/visualwebarena/run_infer.py
+++ b/evaluation/benchmarks/visualwebarena/run_infer.py
@@ -37,6 +37,7 @@
     BROWSER_EVAL_GET_GOAL_ACTION,
     BROWSER_EVAL_GET_REWARDS_ACTION,
 )
+from openhands.utils.async_utils import call_async_from_sync
 
 SUPPORTED_AGENT_CLS = {'VisualBrowsingAgent'}
 AGENT_CLS_TO_FAKE_USER_RESPONSE_FN = {
@@ -159,6 +160,8 @@ def process_instance(
         logger.info(f'Starting evaluation for instance {env_id}.')
 
     runtime = create_runtime(config)
+    call_async_from_sync(runtime.connect)
+
     task_str, goal_image_urls = initialize_runtime(runtime)
     initial_user_action = MessageAction(content=task_str, image_urls=goal_image_urls)
     state: State | None = asyncio.run(
diff --git a/evaluation/benchmarks/webarena/run_infer.py b/evaluation/benchmarks/webarena/run_infer.py
index 3da1cb4a8d0b..a9b251b90ae5 100644
--- a/evaluation/benchmarks/webarena/run_infer.py
+++ b/evaluation/benchmarks/webarena/run_infer.py
@@ -36,6 +36,7 @@
     BROWSER_EVAL_GET_GOAL_ACTION,
     BROWSER_EVAL_GET_REWARDS_ACTION,
 )
+from openhands.utils.async_utils import call_async_from_sync
 
 SUPPORTED_AGENT_CLS = {'BrowsingAgent'}
 
@@ -144,6 +145,7 @@ def process_instance(
         logger.info(f'Starting evaluation for instance {env_id}.')
 
     runtime = create_runtime(config)
+    call_async_from_sync(runtime.connect)
     task_str = initialize_runtime(runtime)
 
     state: State | None = asyncio.run(
diff --git a/evaluation/integration_tests/run_infer.py b/evaluation/integration_tests/run_infer.py
index 61c7e3bde910..d215b0599bf0 100644
--- a/evaluation/integration_tests/run_infer.py
+++ b/evaluation/integration_tests/run_infer.py
@@ -30,6 +30,7 @@
 from openhands.events.action import MessageAction
 from openhands.events.serialization.event import event_to_dict
 from openhands.runtime.base import Runtime
+from openhands.utils.async_utils import call_async_from_sync
 
 FAKE_RESPONSES = {
     'CodeActAgent': fake_user_response,
@@ -108,6 +109,7 @@ def process_instance(
     # create sandbox and run the agent
     # =============================================
     runtime: Runtime = create_runtime(config)
+    call_async_from_sync(runtime.connect)
     try:
         test_class.initialize_runtime(runtime)
 
diff --git a/openhands/core/cli.py b/openhands/core/cli.py
index 0e43cfc2d6a7..bb134803ae0a 100644
--- a/openhands/core/cli.py
+++ b/openhands/core/cli.py
@@ -14,7 +14,12 @@
 from openhands.core.logger import openhands_logger as logger
 from openhands.core.loop import run_agent_until_done
 from openhands.core.schema import AgentState
-from openhands.core.setup import create_agent, create_controller, create_runtime
+from openhands.core.setup import (
+    create_agent,
+    create_controller,
+    create_runtime,
+    initialize_repository_for_runtime,
+)
 from openhands.events import EventSource, EventStreamSubscriber
 from openhands.events.action import (
     Action,
@@ -109,7 +114,6 @@ async def main(loop: asyncio.AbstractEventLoop):
         sid=sid,
         headless_mode=True,
         agent=agent,
-        selected_repository=config.sandbox.selected_repo,
     )
 
     controller, _ = create_controller(agent, runtime, config)
@@ -165,6 +169,14 @@ def on_event(event: Event) -> None:
 
     await runtime.connect()
 
+    # Initialize repository if needed
+    if config.sandbox.selected_repo:
+        initialize_repository_for_runtime(
+            runtime,
+            agent=agent,
+            selected_repository=config.sandbox.selected_repo,
+        )
+
     if initial_user_action:
         # If there's an initial user action, enqueue it and do not prompt again
         event_stream.add_event(initial_user_action, EventSource.USER)
diff --git a/openhands/core/main.py b/openhands/core/main.py
index ab642553b351..4b282864f247 100644
--- a/openhands/core/main.py
+++ b/openhands/core/main.py
@@ -20,6 +20,7 @@
     create_controller,
     create_runtime,
     generate_sid,
+    initialize_repository_for_runtime,
 )
 from openhands.events import EventSource, EventStreamSubscriber
 from openhands.events.action import MessageAction, NullAction
@@ -29,6 +30,7 @@
 from openhands.events.serialization import event_from_dict
 from openhands.io import read_input, read_task
 from openhands.runtime.base import Runtime
+from openhands.utils.async_utils import call_async_from_sync
 
 
 class FakeUserResponseFunc(Protocol):
@@ -97,8 +99,17 @@ async def run_controller(
             sid=sid,
             headless_mode=headless_mode,
             agent=agent,
-            selected_repository=config.sandbox.selected_repo,
         )
+        # Connect to the runtime
+        call_async_from_sync(runtime.connect)
+
+        # Initialize repository if needed
+        if config.sandbox.selected_repo:
+            initialize_repository_for_runtime(
+                runtime,
+                agent=agent,
+                selected_repository=config.sandbox.selected_repo,
+            )
 
     event_stream = runtime.event_stream
 
diff --git a/openhands/core/setup.py b/openhands/core/setup.py
index 9142be6713db..709ef923b98d 100644
--- a/openhands/core/setup.py
+++ b/openhands/core/setup.py
@@ -21,7 +21,6 @@
 from openhands.runtime.base import Runtime
 from openhands.security import SecurityAnalyzer, options
 from openhands.storage import get_file_store
-from openhands.utils.async_utils import call_async_from_sync
 
 
 def create_runtime(
@@ -29,18 +28,19 @@ def create_runtime(
     sid: str | None = None,
     headless_mode: bool = True,
     agent: Agent | None = None,
-    selected_repository: str | None = None,
-    github_token: SecretStr | None = None,
 ) -> Runtime:
     """Create a runtime for the agent to run on.
 
-    config: The app config.
-    sid: (optional) The session id. IMPORTANT: please don't set this unless you know what you're doing.
-        Set it to incompatible value will cause unexpected behavior on RemoteRuntime.
-    headless_mode: Whether the agent is run in headless mode. `create_runtime` is typically called within evaluation scripts,
-        where we don't want to have the VSCode UI open, so it defaults to True.
-    selected_repository: (optional) The GitHub repository to use.
-    github_token: (optional) The GitHub token to use.
+    Args:
+        config: The app config.
+        sid: (optional) The session id. IMPORTANT: please don't set this unless you know what you're doing.
+            Set it to incompatible value will cause unexpected behavior on RemoteRuntime.
+        headless_mode: Whether the agent is run in headless mode. `create_runtime` is typically called within evaluation scripts,
+            where we don't want to have the VSCode UI open, so it defaults to True.
+        agent: (optional) The agent instance to use for configuring the runtime.
+
+    Returns:
+        The created Runtime instance (not yet connected or initialized).
     """
     # if sid is provided on the command line, use it as the name of the event stream
     # otherwise generate it on the basis of the configured jwt_secret
@@ -74,8 +74,30 @@ def create_runtime(
         headless_mode=headless_mode,
     )
 
-    call_async_from_sync(runtime.connect)
+    logger.debug(
+        f'Runtime created with plugins: {[plugin.name for plugin in runtime.plugins]}'
+    )
+
+    return runtime
+
 
+def initialize_repository_for_runtime(
+    runtime: Runtime,
+    agent: Agent | None = None,
+    selected_repository: str | None = None,
+    github_token: SecretStr | None = None,
+) -> str | None:
+    """Initialize the repository for the runtime.
+
+    Args:
+        runtime: The runtime to initialize the repository for.
+        agent: (optional) The agent to load microagents for.
+        selected_repository: (optional) The GitHub repository to use.
+        github_token: (optional) The GitHub token to use.
+
+    Returns:
+        The repository directory path if a repository was cloned, None otherwise.
+    """
     # clone selected repository if provided
     repo_directory = None
     github_token = (
@@ -98,11 +120,7 @@ def create_runtime(
         agent.prompt_manager.load_microagents(microagents)
         agent.prompt_manager.set_repository_info(selected_repository, repo_directory)
 
-    logger.debug(
-        f'Runtime initialized with plugins: {[plugin.name for plugin in runtime.plugins]}'
-    )
-
-    return runtime
+    return repo_directory
 
 
 def create_agent(config: AppConfig) -> Agent: