Merge branch 'All-Hands-AI:main' into main

RainRat · Mar 5, 2025 · cb75170 · cb75170
2 parents f38428a + 1ffee80
commit cb75170
Show file tree

Hide file tree

Showing 19 changed files with 176 additions and 34 deletions.
diff --git a/.github/workflows/ghcr-build.yml b/.github/workflows/ghcr-build.yml
@@ -41,8 +41,10 @@ jobs:
     steps:
       - name: Checkout
         uses: actions/checkout@v4
+        with:
+          ref: ${{ github.event.pull_request.head.sha }}
       - name: Set up QEMU
-        uses: docker/setup-qemu-action@v3.4.0
+        uses: docker/setup-qemu-action@v3.6.0
         with:
           image: tonistiigi/binfmt:latest
       - name: Login to GHCR
@@ -90,8 +92,10 @@ jobs:
     steps:
       - name: Checkout
         uses: actions/checkout@v4
+        with:
+          ref: ${{ github.event.pull_request.head.sha }}
       - name: Set up QEMU
-        uses: docker/setup-qemu-action@v3.4.0
+        uses: docker/setup-qemu-action@v3.6.0
         with:
           image: tonistiigi/binfmt:latest
       - name: Login to GHCR
@@ -154,6 +158,8 @@ jobs:
         base_image: ['nikolaik']
     steps:
       - uses: actions/checkout@v4
+        with:
+          ref: ${{ github.event.pull_request.head.sha }}
       - name: Cache Poetry dependencies
         uses: actions/cache@v4
         with:

diff --git a/docs/package-lock.json b/docs/package-lock.json
diff --git a/docs/package.json b/docs/package.json
@@ -31,7 +31,7 @@
     "@docusaurus/module-type-aliases": "^3.5.1",
     "@docusaurus/tsconfig": "^3.7.0",
     "@docusaurus/types": "^3.5.1",
-    "typescript": "~5.7.3"
+    "typescript": "~5.8.2"
   },
   "browserslist": {
     "production": [

diff --git a/evaluation/benchmarks/aider_bench/README.md b/evaluation/benchmarks/aider_bench/README.md
@@ -56,9 +56,10 @@ You can update the arguments in the script
 ./evaluation/benchmarks/aider_bench/scripts/run_infer.sh eval_gpt35_turbo HEAD CodeActAgent 100 1 "1,3,10"
 ```
 
-### Run Inference on `RemoteRuntime` (experimental)
+### Run Inference on `RemoteRuntime`
+
+This is in beta. Fill out [this form](https://docs.google.com/forms/d/e/1FAIpQLSckVz_JFwg2_mOxNZjCtr7aoBFI2Mwdan3f75J_TrdMS1JV2g/viewform) to apply if you want to try this out!
 
-This is in limited beta. Contact Xingyao over slack if you want to try this out!
 
 ```bash
 ./evaluation/benchmarks/aider_bench/scripts/run_infer.sh [model_config] [git-version] [agent] [eval_limit] [eval-num-workers] [eval_ids]

diff --git a/evaluation/benchmarks/commit0_bench/README.md b/evaluation/benchmarks/commit0_bench/README.md
@@ -58,9 +58,10 @@ then your command would be:
 ./evaluation/benchmarks/commit0_bench/scripts/run_infer.sh lite llm.eval_sonnet HEAD CodeActAgent 10 30 1 wentingzhao/commit0_combined test
 ```
 
-### Run Inference on `RemoteRuntime` (experimental)
+### Run Inference on `RemoteRuntime`
+
+This is in beta. Fill out [this form](https://docs.google.com/forms/d/e/1FAIpQLSckVz_JFwg2_mOxNZjCtr7aoBFI2Mwdan3f75J_TrdMS1JV2g/viewform) to apply if you want to try this out!
 
-This is in limited beta. Contact Xingyao over slack if you want to try this out!
 
 ```bash
 ./evaluation/benchmarks/commit0_bench/scripts/run_infer.sh [repo_split] [model_config] [git-version] [agent] [eval_limit] [max_iter] [num_workers] [dataset] [dataset_split]

diff --git a/evaluation/benchmarks/miniwob/README.md b/evaluation/benchmarks/miniwob/README.md
@@ -19,9 +19,10 @@ Access with browser the above MiniWoB URLs and see if they load correctly.
 ./evaluation/benchmarks/miniwob/scripts/run_infer.sh llm.claude-35-sonnet-eval
 ```
 
-### Run Inference on `RemoteRuntime` (experimental)
+### Run Inference on `RemoteRuntime`
+
+This is in beta. Fill out [this form](https://docs.google.com/forms/d/e/1FAIpQLSckVz_JFwg2_mOxNZjCtr7aoBFI2Mwdan3f75J_TrdMS1JV2g/viewform) to apply if you want to try this out!
 
-This is in limited beta. Contact Xingyao over slack if you want to try this out!
 
 ```bash
 ./evaluation/benchmarks/miniwob/scripts/run_infer.sh [model_config] [git-version] [agent] [note] [eval_limit] [num_workers]

diff --git a/evaluation/benchmarks/swe_bench/README.md b/evaluation/benchmarks/swe_bench/README.md
@@ -65,9 +65,9 @@ then your command would be:
 ./evaluation/benchmarks/swe_bench/scripts/run_infer.sh llm.eval_gpt4_1106_preview HEAD CodeActAgent 10
 ```
 
-### Run Inference on `RemoteRuntime` (experimental)
+### Run Inference on `RemoteRuntime`
 
-This is in limited beta. Contact Xingyao over slack if you want to try this out!
+This is in beta. Fill out [this form](https://docs.google.com/forms/d/e/1FAIpQLSckVz_JFwg2_mOxNZjCtr7aoBFI2Mwdan3f75J_TrdMS1JV2g/viewform) to apply if you want to try this out!
 
 ```bash
 ./evaluation/benchmarks/swe_bench/scripts/run_infer.sh [model_config] [git-version] [agent] [eval_limit] [max_iter] [num_workers] [dataset] [dataset_split]
@@ -163,9 +163,9 @@ The final results will be saved to `evaluation/evaluation_outputs/outputs/swe_be
 - `report.json`: a JSON file that contains keys like `"resolved_ids"` pointing to instance IDs that are resolved by the agent.
 - `logs/`: a directory of test logs
 
-### Run evaluation with `RemoteRuntime` (experimental)
+### Run evaluation with `RemoteRuntime`
 
-This is in limited beta. Contact Xingyao over slack if you want to try this out!
+This is in beta. Fill out [this form](https://docs.google.com/forms/d/e/1FAIpQLSckVz_JFwg2_mOxNZjCtr7aoBFI2Mwdan3f75J_TrdMS1JV2g/viewform) to apply if you want to try this out!
 
 ```bash
 ./evaluation/benchmarks/swe_bench/scripts/eval_infer_remote.sh [output.jsonl filepath] [num_workers]

diff --git a/frontend/src/components/features/settings/settings-dropdown-input.tsx b/frontend/src/components/features/settings/settings-dropdown-input.tsx
@@ -1,9 +1,10 @@
 import { Autocomplete, AutocompleteItem } from "@heroui/react";
+import { ReactNode } from "react";
 import { OptionalTag } from "./optional-tag";
 
 interface SettingsDropdownInputProps {
   testId: string;
-  label: string;
+  label: ReactNode;
   name: string;
   items: { key: React.Key; label: string }[];
   showOptionalTag?: boolean;
@@ -29,7 +30,7 @@ export function SettingsDropdownInput({
         {showOptionalTag && <OptionalTag />}
       </div>
       <Autocomplete
-        aria-label={label}
+        aria-label={typeof label === "string" ? label : name}
         data-testid={testId}
         name={name}
         defaultItems={items}

diff --git a/frontend/src/components/shared/modals/settings/settings-form.tsx b/frontend/src/components/shared/modals/settings/settings-form.tsx
@@ -43,15 +43,18 @@ export function SettingsForm({ settings, models, onClose }: SettingsFormProps) {
   const handleFormSubmission = async (formData: FormData) => {
     const newSettings = extractSettings(formData);
 
-    await saveUserSettings(newSettings);
-    onClose();
-    resetOngoingSession();
-
-    posthog.capture("settings_saved", {
-      LLM_MODEL: newSettings.LLM_MODEL,
-      LLM_API_KEY: newSettings.LLM_API_KEY ? "SET" : "UNSET",
-      REMOTE_RUNTIME_RESOURCE_FACTOR:
-        newSettings.REMOTE_RUNTIME_RESOURCE_FACTOR,
+    await saveUserSettings(newSettings, {
+      onSuccess: () => {
+        onClose();
+        resetOngoingSession();
+
+        posthog.capture("settings_saved", {
+          LLM_MODEL: newSettings.LLM_MODEL,
+          LLM_API_KEY: newSettings.LLM_API_KEY ? "SET" : "UNSET",
+          REMOTE_RUNTIME_RESOURCE_FACTOR:
+            newSettings.REMOTE_RUNTIME_RESOURCE_FACTOR,
+        });
+      },
     });
   };
 

diff --git a/frontend/src/hooks/query/use-settings.ts b/frontend/src/hooks/query/use-settings.ts
@@ -3,7 +3,6 @@ import React from "react";
 import posthog from "posthog-js";
 import OpenHands from "#/api/open-hands";
 import { useAuth } from "#/context/auth-context";
-import { useConfig } from "#/hooks/query/use-config";
 import { DEFAULT_SETTINGS } from "#/services/settings";
 
 const getSettingsQueryFn = async () => {
@@ -27,12 +26,10 @@ const getSettingsQueryFn = async () => {
 
 export const useSettings = () => {
   const { setGitHubTokenIsSet, githubTokenIsSet } = useAuth();
-  const { data: config } = useConfig();
 
   const query = useQuery({
     queryKey: ["settings", githubTokenIsSet],
     queryFn: getSettingsQueryFn,
-    enabled: config?.APP_MODE !== "saas" || githubTokenIsSet,
     // Only retry if the error is not a 404 because we
     // would want to show the modal immediately if the
     // settings are not found

diff --git a/frontend/src/routes/account-settings.tsx b/frontend/src/routes/account-settings.tsx
@@ -278,7 +278,15 @@ function AccountSettings() {
               <SettingsDropdownInput
                 testId="runtime-settings-input"
                 name="runtime-settings-input"
-                label="Runtime Settings"
+                label={
+                  <>
+                    Runtime Settings (
+                    <a href="mailto:[email protected]">
+                      get in touch for access
+                    </a>
+                    )
+                  </>
+                }
                 items={REMOTE_RUNTIME_OPTIONS}
                 defaultSelectedKey={settings.REMOTE_RUNTIME_RESOURCE_FACTOR?.toString()}
                 isDisabled

diff --git a/openhands/agenthub/codeact_agent/prompts/additional_info.j2 b/openhands/agenthub/codeact_agent/prompts/additional_info.j2
@@ -8,8 +8,9 @@ At the user's request, repository {{ repository_info.repo_name }} has been clone
 {{ repository_instructions }}
 </REPOSITORY_INSTRUCTIONS>
 {% endif %}
-{% if runtime_info and runtime_info.available_hosts -%}
+{% if runtime_info and (runtime_info.available_hosts or runtime_info.additional_agent_instructions) -%}
 <RUNTIME_INFORMATION>
+{% if runtime_info.available_hosts %}
 The user has access to the following hosts for accessing a web application,
 each of which has a corresponding port:
 {% for host, port in runtime_info.available_hosts.items() -%}
@@ -18,5 +19,7 @@ each of which has a corresponding port:
 When starting a web server, use the corresponding ports. You should also
 set any options to allow iframes and CORS requests, and allow the server to
 be accessed from any host (e.g. 0.0.0.0).
+{% endif %}
+{{ runtime_info.additional_agent_instructions }}
 </RUNTIME_INFORMATION>
 {% endif %}
diff --git a/openhands/core/config/condenser_config.py b/openhands/core/config/condenser_config.py
@@ -27,6 +27,17 @@ class ObservationMaskingCondenserConfig(BaseModel):
     model_config = {'extra': 'forbid'}
 
 
+class BrowserOutputCondenserConfig(BaseModel):
+    """Configuration for the BrowserOutputCondenser."""
+
+    type: Literal['browser_output_masking'] = Field('browser_output_masking')
+    attention_window: int = Field(
+        default=1,
+        description='The number of most recent browser output observations that will not be masked.',
+        ge=1,
+    )
+
+
 class RecentEventsCondenserConfig(BaseModel):
     """Configuration for RecentEventsCondenser."""
 
@@ -115,6 +126,7 @@ class LLMAttentionCondenserConfig(BaseModel):
 CondenserConfig = (
     NoOpCondenserConfig
     | ObservationMaskingCondenserConfig
+    | BrowserOutputCondenserConfig
     | RecentEventsCondenserConfig
     | LLMSummarizingCondenserConfig
     | AmortizedForgettingCondenserConfig

diff --git a/openhands/memory/condenser/impl/__init__.py b/openhands/memory/condenser/impl/__init__.py
@@ -1,6 +1,9 @@
 from openhands.memory.condenser.impl.amortized_forgetting_condenser import (
     AmortizedForgettingCondenser,
 )
+from openhands.memory.condenser.impl.browser_output_condenser import (
+    BrowserOutputCondenser,
+)
 from openhands.memory.condenser.impl.llm_attention_condenser import (
     ImportantEventSelection,
     LLMAttentionCondenser,
@@ -23,5 +26,6 @@
     'LLMSummarizingCondenser',
     'NoOpCondenser',
     'ObservationMaskingCondenser',
+    'BrowserOutputCondenser',
     'RecentEventsCondenser',
 ]
diff --git a/openhands/memory/condenser/impl/browser_output_condenser.py b/openhands/memory/condenser/impl/browser_output_condenser.py
@@ -0,0 +1,48 @@
+from __future__ import annotations
+
+from openhands.core.config.condenser_config import BrowserOutputCondenserConfig
+from openhands.events.event import Event
+from openhands.events.observation import BrowserOutputObservation
+from openhands.events.observation.agent import AgentCondensationObservation
+from openhands.memory.condenser.condenser import Condenser
+
+
+class BrowserOutputCondenser(Condenser):
+    """A condenser that masks the observations from browser outputs outside of a recent attention window.
+    
+    The intent here is to mask just the browser outputs and leave everything else untouched. This is important because currently we provide screenshots and accessibility trees as input to the model for browser observations. These are really large and consume a lot of tokens without any benefits in performance. So we want to mask all such observations from all previous timesteps, and leave only the most recent one in context.
+    """
+
+    def __init__(self, attention_window: int = 1):
+        self.attention_window = attention_window
+        super().__init__()
+
+    def condense(self, events: list[Event]) -> list[Event]:
+        """Replace the content of browser observations outside of the attention window with a placeholder."""
+        results: list[Event] = []
+        cnt: int = 0
+        for event in reversed(events):
+            if (
+                isinstance(event, BrowserOutputObservation)
+                and cnt >= self.attention_window
+            ):
+                results.append(
+                    AgentCondensationObservation(
+                        f'Current URL: {event.url}\nContent Omitted'
+                    )
+                )
+            else:
+                results.append(event)
+                if isinstance(event, BrowserOutputObservation):
+                    cnt += 1
+
+        return list(reversed(results))
+
+    @classmethod
+    def from_config(
+        cls, config: BrowserOutputCondenserConfig
+    ) -> BrowserOutputCondenser:
+        return BrowserOutputCondenser(**config.model_dump(exclude=['type']))
+
+
+BrowserOutputCondenser.register_config(BrowserOutputCondenserConfig)
diff --git a/openhands/runtime/base.py b/openhands/runtime/base.py
@@ -487,3 +487,7 @@ def vscode_url(self) -> str | None:
     @property
     def web_hosts(self) -> dict[str, int]:
         return {}
+
+    @property
+    def additional_agent_instructions(self) -> str:
+        return ''
diff --git a/openhands/runtime/impl/daytona/daytona_runtime.py b/openhands/runtime/impl/daytona/daytona_runtime.py
@@ -260,3 +260,7 @@ def vscode_url(self) -> str | None:
         )
 
         return self._vscode_url
+
+    @property
+    def additional_agent_instructions(self) -> str:
+        return f'When showing endpoints to access applications for any port, e.g. port 3000, instead of localhost:3000, use this format: {self._construct_api_url(3000)}.'
diff --git a/openhands/utils/prompt.py b/openhands/utils/prompt.py
@@ -19,6 +19,7 @@
 @dataclass
 class RuntimeInfo:
     available_hosts: dict[str, int]
+    additional_agent_instructions: str
 
 
 @dataclass
@@ -56,7 +57,9 @@ def __init__(
         self.user_template: Template = self._load_template('user_prompt')
         self.additional_info_template: Template = self._load_template('additional_info')
         self.microagent_info_template: Template = self._load_template('microagent_info')
-        self.runtime_info = RuntimeInfo(available_hosts={})
+        self.runtime_info = RuntimeInfo(
+            available_hosts={}, additional_agent_instructions=''
+        )
 
         self.knowledge_microagents: dict[str, KnowledgeMicroAgent] = {}
         self.repo_microagents: dict[str, RepoMicroAgent] = {}
@@ -113,6 +116,9 @@ def get_system_message(self) -> str:
 
     def set_runtime_info(self, runtime: Runtime) -> None:
         self.runtime_info.available_hosts = runtime.web_hosts
+        self.runtime_info.additional_agent_instructions = (
+            runtime.additional_agent_instructions
+        )
 
     def set_repository_info(
         self,