From 246107c618833fcf62e44f2ea67aa08c222906cb Mon Sep 17 00:00:00 2001
From: Graham Neubig <neubig@gmail.com>
Date: Wed, 11 Dec 2024 01:05:29 -0800
Subject: [PATCH 01/10] Parallize Python Unit tests (#5499)

---
 .github/workflows/py-unit-tests.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/py-unit-tests.yml b/.github/workflows/py-unit-tests.yml
index 6e624904a82f..28a14095d7c1 100644
--- a/.github/workflows/py-unit-tests.yml
+++ b/.github/workflows/py-unit-tests.yml
@@ -42,7 +42,7 @@ jobs:
       - name: Build Environment
         run: make build
       - name: Run Tests
-        run: poetry run pytest --forked --cov=openhands --cov-report=xml -svv ./tests/unit --ignore=tests/unit/test_memory.py
+        run: poetry run pytest --forked -n auto --cov=openhands --cov-report=xml -svv ./tests/unit --ignore=tests/unit/test_memory.py
       - name: Upload coverage to Codecov
         uses: codecov/codecov-action@v4
         env:

From a60ee09881b082f5a119ded900abd23f48655e1f Mon Sep 17 00:00:00 2001
From: Robert Brennan <accounts@rbren.io>
Date: Wed, 11 Dec 2024 09:39:09 -0500
Subject: [PATCH 02/10] Add docker layer caching to ghcr build (#5517)

---
 .github/workflows/ghcr-build.yml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.github/workflows/ghcr-build.yml b/.github/workflows/ghcr-build.yml
index 7948291d6ec3..09cfffaaf9ff 100644
--- a/.github/workflows/ghcr-build.yml
+++ b/.github/workflows/ghcr-build.yml
@@ -68,6 +68,9 @@ jobs:
       - name: Set up Docker Buildx
         id: buildx
         uses: docker/setup-buildx-action@v3
+      - name: "Set up docker layer caching"
+        uses: satackey/action-docker-layer-caching@v0.0.11
+        continue-on-error: true
       - name: Build and push app image
         if: "!github.event.pull_request.head.repo.fork"
         run: |

From a6d1a4c98f63098e91dcc4dcfee329ac08759477 Mon Sep 17 00:00:00 2001
From: tofarr <tofarr@gmail.com>
Date: Wed, 11 Dec 2024 07:39:57 -0700
Subject: [PATCH 03/10] Fix: Redis listener attached at startup (#5516)

---
 openhands/server/app.py             | 12 ++++++++++--
 openhands/server/session/manager.py | 10 +++++++---
 2 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/openhands/server/app.py b/openhands/server/app.py
index b16879597824..6207ccf2eb0b 100644
--- a/openhands/server/app.py
+++ b/openhands/server/app.py
@@ -1,4 +1,5 @@
 import warnings
+from contextlib import asynccontextmanager
 
 with warnings.catch_warnings():
     warnings.simplefilter('ignore')
@@ -21,10 +22,17 @@
 from openhands.server.routes.files import app as files_api_router
 from openhands.server.routes.public import app as public_api_router
 from openhands.server.routes.security import app as security_api_router
-from openhands.server.shared import config
+from openhands.server.shared import config, session_manager
 from openhands.utils.import_utils import get_impl
 
-app = FastAPI()
+
+@asynccontextmanager
+async def _lifespan(app: FastAPI):
+    async with session_manager:
+        yield
+
+
+app = FastAPI(lifespan=_lifespan)
 app.add_middleware(
     LocalhostCORSMiddleware,
     allow_credentials=True,
diff --git a/openhands/server/session/manager.py b/openhands/server/session/manager.py
index 10b34b5dd259..fbcf4eaa836a 100644
--- a/openhands/server/session/manager.py
+++ b/openhands/server/session/manager.py
@@ -52,6 +52,7 @@ async def _redis_subscribe(self):
         """
         We use a redis backchannel to send actions between server nodes
         """
+        logger.debug('_redis_subscribe')
         redis_client = self._get_redis_client()
         pubsub = redis_client.pubsub()
         await pubsub.subscribe('oh_event')
@@ -75,7 +76,7 @@ async def _redis_subscribe(self):
 
     async def _process_message(self, message: dict):
         data = json.loads(message['data'])
-        logger.info(f'got_published_message:{message}')
+        logger.debug(f'got_published_message:{message}')
         sid = data['sid']
         message_type = data['message_type']
         if message_type == 'event':
@@ -112,7 +113,7 @@ async def _process_message(self, message: dict):
         elif message_type == 'session_closing':
             # Session closing event - We only get this in the event of graceful shutdown,
             # which can't be guaranteed - nodes can simply vanish unexpectedly!
-            logger.info(f'session_closing:{sid}')
+            logger.debug(f'session_closing:{sid}')
             for (
                 connection_id,
                 local_sid,
@@ -142,7 +143,9 @@ async def attach_to_conversation(self, sid: str) -> Conversation | None:
     async def detach_from_conversation(self, conversation: Conversation):
         await conversation.disconnect()
 
-    async def init_or_join_session(self, sid: str, connection_id: str, session_init_data: SessionInitData):
+    async def init_or_join_session(
+        self, sid: str, connection_id: str, session_init_data: SessionInitData
+    ):
         await self.sio.enter_room(connection_id, ROOM_KEY.format(sid=sid))
         self.local_connection_id_to_session_id[connection_id] = sid
 
@@ -165,6 +168,7 @@ async def _is_session_running_in_cluster(self, sid: str) -> bool:
         flag = asyncio.Event()
         self._session_is_running_flags[sid] = flag
         try:
+            logger.debug(f'publish:is_session_running:{sid}')
             await self._get_redis_client().publish(
                 'oh_event',
                 json.dumps(

From 907c65cc001b12184264192339f04c0841ea5c1d Mon Sep 17 00:00:00 2001
From: Xingyao Wang <xingyao@all-hands.dev>
Date: Wed, 11 Dec 2024 16:51:05 -0500
Subject: [PATCH 04/10] chore: add back accidentally removed repo info (#5532)

---
 frontend/src/routes/_oh.app/hooks/use-ws-status-change.ts | 1 +
 1 file changed, 1 insertion(+)

diff --git a/frontend/src/routes/_oh.app/hooks/use-ws-status-change.ts b/frontend/src/routes/_oh.app/hooks/use-ws-status-change.ts
index 6c465e0d7088..789af7dadfec 100644
--- a/frontend/src/routes/_oh.app/hooks/use-ws-status-change.ts
+++ b/frontend/src/routes/_oh.app/hooks/use-ws-status-change.ts
@@ -52,6 +52,7 @@ export const useWSStatusChange = () => {
 
     if (gitHubToken && selectedRepository) {
       dispatch(clearSelectedRepository());
+      additionalInfo = `Repository ${selectedRepository} has been cloned to /workspace. Please check the /workspace for files.`;
     } else if (importedProjectZip) {
       // if there's an uploaded project zip, add it to the chat
       additionalInfo =

From 6a6ce5f3ee090bf186045e965c6b5b1d88e27e70 Mon Sep 17 00:00:00 2001
From: OpenHands <opendevin@all-hands.dev>
Date: Wed, 11 Dec 2024 18:20:29 -0500
Subject: [PATCH 05/10] Fix issue #5478: Add color to the line next to "Ran a
 XXX Command" based on return value (#5483)

Co-authored-by: Graham Neubig <neubig@gmail.com>
---
 .../chat/expandable-message.test.tsx          |  60 ++++
 .../features/chat/expandable-message.tsx      |  35 ++-
 .../src/components/features/chat/messages.tsx |   1 +
 frontend/src/icons/check-circle-solid.svg     |   4 +
 frontend/src/icons/x-circle-solid.svg         |   4 +
 frontend/src/message.d.ts                     |   1 +
 frontend/src/state/chat-slice.ts              |  27 +-
 frontend/src/types/core/observations.ts       |  17 ++
 frontend/tailwind.config.js                   |   1 +
 frontend/test-utils.tsx                       |  25 +-
 frontend/vitest.setup.ts                      |   8 +-
 openhands/agenthub/codeact_agent/README.md    |   2 +-
 openhands/events/observation/commands.py      |   8 +
 openhands/events/serialization/event.py       |   3 +
 openhands/events/serialization/observation.py |   1 +
 pyproject.toml                                |   2 +
 tests/unit/test_command_success.py            |  27 ++
 tests/unit/test_event_serialization.py        |  18 ++
 tests/unit/test_observation_serialization.py  |  51 ++--
 tests/unit/test_security.py                   | 289 ++++++++++++------
 20 files changed, 447 insertions(+), 137 deletions(-)
 create mode 100644 frontend/__tests__/components/chat/expandable-message.test.tsx
 create mode 100644 frontend/src/icons/check-circle-solid.svg
 create mode 100644 frontend/src/icons/x-circle-solid.svg
 create mode 100644 tests/unit/test_command_success.py
 create mode 100644 tests/unit/test_event_serialization.py

diff --git a/frontend/__tests__/components/chat/expandable-message.test.tsx b/frontend/__tests__/components/chat/expandable-message.test.tsx
new file mode 100644
index 000000000000..8eab988339de
--- /dev/null
+++ b/frontend/__tests__/components/chat/expandable-message.test.tsx
@@ -0,0 +1,60 @@
+import { describe, expect, it } from "vitest";
+import { screen } from "@testing-library/react";
+import { renderWithProviders } from "test-utils";
+import { ExpandableMessage } from "#/components/features/chat/expandable-message";
+
+describe("ExpandableMessage", () => {
+  it("should render with neutral border for non-action messages", () => {
+    renderWithProviders(<ExpandableMessage message="Hello" type="thought" />);
+    const element = screen.getByText("Hello");
+    const container = element.closest("div.flex.gap-2.items-center.justify-between");
+    expect(container).toHaveClass("border-neutral-300");
+    expect(screen.queryByTestId("status-icon")).not.toBeInTheDocument();
+  });
+
+  it("should render with neutral border for error messages", () => {
+    renderWithProviders(<ExpandableMessage message="Error occurred" type="error" />);
+    const element = screen.getByText("Error occurred");
+    const container = element.closest("div.flex.gap-2.items-center.justify-between");
+    expect(container).toHaveClass("border-neutral-300");
+    expect(screen.queryByTestId("status-icon")).not.toBeInTheDocument();
+  });
+
+  it("should render with success icon for successful action messages", () => {
+    renderWithProviders(
+      <ExpandableMessage
+        message="Command executed successfully"
+        type="action"
+        success={true}
+      />
+    );
+    const element = screen.getByText("Command executed successfully");
+    const container = element.closest("div.flex.gap-2.items-center.justify-between");
+    expect(container).toHaveClass("border-neutral-300");
+    const icon = screen.getByTestId("status-icon");
+    expect(icon).toHaveClass("fill-success");
+  });
+
+  it("should render with error icon for failed action messages", () => {
+    renderWithProviders(
+      <ExpandableMessage
+        message="Command failed"
+        type="action"
+        success={false}
+      />
+    );
+    const element = screen.getByText("Command failed");
+    const container = element.closest("div.flex.gap-2.items-center.justify-between");
+    expect(container).toHaveClass("border-neutral-300");
+    const icon = screen.getByTestId("status-icon");
+    expect(icon).toHaveClass("fill-danger");
+  });
+
+  it("should render with neutral border and no icon for action messages without success prop", () => {
+    renderWithProviders(<ExpandableMessage message="Running command" type="action" />);
+    const element = screen.getByText("Running command");
+    const container = element.closest("div.flex.gap-2.items-center.justify-between");
+    expect(container).toHaveClass("border-neutral-300");
+    expect(screen.queryByTestId("status-icon")).not.toBeInTheDocument();
+  });
+});
diff --git a/frontend/src/components/features/chat/expandable-message.tsx b/frontend/src/components/features/chat/expandable-message.tsx
index f42b3f0b13ad..6ebcaa3aeed5 100644
--- a/frontend/src/components/features/chat/expandable-message.tsx
+++ b/frontend/src/components/features/chat/expandable-message.tsx
@@ -6,17 +6,21 @@ import { code } from "../markdown/code";
 import { ol, ul } from "../markdown/list";
 import ArrowUp from "#/icons/angle-up-solid.svg?react";
 import ArrowDown from "#/icons/angle-down-solid.svg?react";
+import CheckCircle from "#/icons/check-circle-solid.svg?react";
+import XCircle from "#/icons/x-circle-solid.svg?react";
 
 interface ExpandableMessageProps {
   id?: string;
   message: string;
   type: string;
+  success?: boolean;
 }
 
 export function ExpandableMessage({
   id,
   message,
   type,
+  success,
 }: ExpandableMessageProps) {
   const { t, i18n } = useTranslation();
   const [showDetails, setShowDetails] = useState(true);
@@ -31,22 +35,14 @@ export function ExpandableMessage({
     }
   }, [id, message, i18n.language]);
 
-  const border = type === "error" ? "border-danger" : "border-neutral-300";
-  const textColor = type === "error" ? "text-danger" : "text-neutral-300";
-  let arrowClasses = "h-4 w-4 ml-2 inline";
-  if (type === "error") {
-    arrowClasses += " fill-danger";
-  } else {
-    arrowClasses += " fill-neutral-300";
-  }
+  const arrowClasses = "h-4 w-4 ml-2 inline fill-neutral-300";
+  const statusIconClasses = "h-4 w-4 ml-2 inline";
 
   return (
-    <div
-      className={`flex gap-2 items-center justify-start border-l-2 pl-2 my-2 py-2 ${border}`}
-    >
+    <div className="flex gap-2 items-center justify-between border-l-2 border-neutral-300 pl-2 my-2 py-2">
       <div className="text-sm leading-4 flex flex-col gap-2 max-w-full">
         {headline && (
-          <p className={`${textColor} font-bold`}>
+          <p className="text-neutral-300 font-bold">
             {headline}
             <button
               type="button"
@@ -75,6 +71,21 @@ export function ExpandableMessage({
           </Markdown>
         )}
       </div>
+      {type === "action" && success !== undefined && (
+        <div className="flex-shrink-0">
+          {success ? (
+            <CheckCircle
+              data-testid="status-icon"
+              className={`${statusIconClasses} fill-success`}
+            />
+          ) : (
+            <XCircle
+              data-testid="status-icon"
+              className={`${statusIconClasses} fill-danger`}
+            />
+          )}
+        </div>
+      )}
     </div>
   );
 }
diff --git a/frontend/src/components/features/chat/messages.tsx b/frontend/src/components/features/chat/messages.tsx
index 8b0d703b7553..e1bd34637472 100644
--- a/frontend/src/components/features/chat/messages.tsx
+++ b/frontend/src/components/features/chat/messages.tsx
@@ -20,6 +20,7 @@ export function Messages({
           type={message.type}
           id={message.translationID}
           message={message.content}
+          success={message.success}
         />
       );
     }
diff --git a/frontend/src/icons/check-circle-solid.svg b/frontend/src/icons/check-circle-solid.svg
new file mode 100644
index 000000000000..a07362b4ab3f
--- /dev/null
+++ b/frontend/src/icons/check-circle-solid.svg
@@ -0,0 +1,4 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512">
+  <path d="M256 512A256 256 0 1 0 256 0a256 256 0 1 0 0 512zM369 209L241 337c-9.4 9.4-24.6 9.4-33.9 0l-64-64c-9.4-9.4-9.4-24.6 0-33.9s24.6-9.4 33.9 0l47 47L335 175c9.4-9.4 24.6-9.4 33.9 0s9.4 24.6 0 33.9z"/>
+</svg>
diff --git a/frontend/src/icons/x-circle-solid.svg b/frontend/src/icons/x-circle-solid.svg
new file mode 100644
index 000000000000..f673bbf0b1e5
--- /dev/null
+++ b/frontend/src/icons/x-circle-solid.svg
@@ -0,0 +1,4 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512">
+  <path d="M256 512A256 256 0 1 0 256 0a256 256 0 1 0 0 512zM175 175c9.4-9.4 24.6-9.4 33.9 0l47 47 47-47c9.4-9.4 24.6-9.4 33.9 0s9.4 24.6 0 33.9l-47 47 47 47c9.4 9.4 9.4 24.6 0 33.9s-24.6 9.4-33.9 0l-47-47-47 47c-9.4 9.4-24.6 9.4-33.9 0s-9.4-24.6 0-33.9l47-47-47-47c-9.4-9.4-9.4-24.6 0-33.9z"/>
+</svg>
diff --git a/frontend/src/message.d.ts b/frontend/src/message.d.ts
index 5b70e39c8f56..65bd7e0cb193 100644
--- a/frontend/src/message.d.ts
+++ b/frontend/src/message.d.ts
@@ -4,6 +4,7 @@ type Message = {
   timestamp: string;
   imageUrls?: string[];
   type?: "thought" | "error" | "action";
+  success?: boolean;
   pending?: boolean;
   translationID?: string;
   eventID?: number;
diff --git a/frontend/src/state/chat-slice.ts b/frontend/src/state/chat-slice.ts
index df24236c2c5a..47d2b651754d 100644
--- a/frontend/src/state/chat-slice.ts
+++ b/frontend/src/state/chat-slice.ts
@@ -1,14 +1,25 @@
 import { createSlice, PayloadAction } from "@reduxjs/toolkit";
 
 import { ActionSecurityRisk } from "#/state/security-analyzer-slice";
-import { OpenHandsObservation } from "#/types/core/observations";
+import {
+  OpenHandsObservation,
+  CommandObservation,
+  IPythonObservation,
+} from "#/types/core/observations";
 import { OpenHandsAction } from "#/types/core/actions";
+import { OpenHandsEventType } from "#/types/core/base";
 
 type SliceState = { messages: Message[] };
 
 const MAX_CONTENT_LENGTH = 1000;
 
-const HANDLED_ACTIONS = ["run", "run_ipython", "write", "read", "browse"];
+const HANDLED_ACTIONS: OpenHandsEventType[] = [
+  "run",
+  "run_ipython",
+  "write",
+  "read",
+  "browse",
+];
 
 function getRiskText(risk: ActionSecurityRisk) {
   switch (risk) {
@@ -131,6 +142,18 @@ export const chatSlice = createSlice({
         return;
       }
       causeMessage.translationID = translationID;
+      // Set success property based on observation type
+      if (observationID === "run") {
+        const commandObs = observation.payload as CommandObservation;
+        causeMessage.success = commandObs.extras.exit_code === 0;
+      } else if (observationID === "run_ipython") {
+        // For IPython, we consider it successful if there's no error message
+        const ipythonObs = observation.payload as IPythonObservation;
+        causeMessage.success = !ipythonObs.message
+          .toLowerCase()
+          .includes("error");
+      }
+
       if (observationID === "run" || observationID === "run_ipython") {
         let { content } = observation.payload;
         if (content.length > MAX_CONTENT_LENGTH) {
diff --git a/frontend/src/types/core/observations.ts b/frontend/src/types/core/observations.ts
index 0b95099a8384..7ddc3f05dd94 100644
--- a/frontend/src/types/core/observations.ts
+++ b/frontend/src/types/core/observations.ts
@@ -52,6 +52,21 @@ export interface BrowseObservation extends OpenHandsObservationEvent<"browse"> {
   };
 }
 
+export interface WriteObservation extends OpenHandsObservationEvent<"write"> {
+  source: "agent";
+  extras: {
+    path: string;
+    content: string;
+  };
+}
+
+export interface ReadObservation extends OpenHandsObservationEvent<"read"> {
+  source: "agent";
+  extras: {
+    path: string;
+  };
+}
+
 export interface ErrorObservation extends OpenHandsObservationEvent<"error"> {
   source: "user";
   extras: {
@@ -65,4 +80,6 @@ export type OpenHandsObservation =
   | IPythonObservation
   | DelegateObservation
   | BrowseObservation
+  | WriteObservation
+  | ReadObservation
   | ErrorObservation;
diff --git a/frontend/tailwind.config.js b/frontend/tailwind.config.js
index 1e665d23fdbb..1a57ebcd8bce 100644
--- a/frontend/tailwind.config.js
+++ b/frontend/tailwind.config.js
@@ -14,6 +14,7 @@ export default {
         'root-secondary': '#262626',
         'hyperlink': '#007AFF',
         'danger': '#EF3744',
+        'success': '#4CAF50',
       },
     },
   },
diff --git a/frontend/test-utils.tsx b/frontend/test-utils.tsx
index 4b336602fbf6..6739e3be6e15 100644
--- a/frontend/test-utils.tsx
+++ b/frontend/test-utils.tsx
@@ -6,10 +6,31 @@ import { configureStore } from "@reduxjs/toolkit";
 // eslint-disable-next-line import/no-extraneous-dependencies
 import { RenderOptions, render } from "@testing-library/react";
 import { QueryClient, QueryClientProvider } from "@tanstack/react-query";
+import { I18nextProvider } from "react-i18next";
+import i18n from "i18next";
+import { initReactI18next } from "react-i18next";
 import { AppStore, RootState, rootReducer } from "./src/store";
 import { AuthProvider } from "#/context/auth-context";
 import { UserPrefsProvider } from "#/context/user-prefs-context";
 
+// Initialize i18n for tests
+i18n
+  .use(initReactI18next)
+  .init({
+    lng: "en",
+    fallbackLng: "en",
+    ns: ["translation"],
+    defaultNS: "translation",
+    resources: {
+      en: {
+        translation: {},
+      },
+    },
+    interpolation: {
+      escapeValue: false,
+    },
+  });
+
 const setupStore = (preloadedState?: Partial<RootState>): AppStore =>
   configureStore({
     reducer: rootReducer,
@@ -40,7 +61,9 @@ export function renderWithProviders(
         <UserPrefsProvider>
           <AuthProvider>
             <QueryClientProvider client={new QueryClient()}>
-              {children}
+              <I18nextProvider i18n={i18n}>
+                {children}
+              </I18nextProvider>
             </QueryClientProvider>
           </AuthProvider>
         </UserPrefsProvider>
diff --git a/frontend/vitest.setup.ts b/frontend/vitest.setup.ts
index 105337e75eba..e9a89c8677f6 100644
--- a/frontend/vitest.setup.ts
+++ b/frontend/vitest.setup.ts
@@ -12,7 +12,13 @@ HTMLElement.prototype.scrollTo = vi.fn();
 // Mock the i18n provider
 vi.mock("react-i18next", async (importOriginal) => ({
   ...(await importOriginal<typeof import("react-i18next")>()),
-  useTranslation: () => ({ t: (key: string) => key }),
+  useTranslation: () => ({
+    t: (key: string) => key,
+    i18n: {
+      language: "en",
+      exists: () => false,
+    },
+  }),
 }));
 
 // Mock requests during tests
diff --git a/openhands/agenthub/codeact_agent/README.md b/openhands/agenthub/codeact_agent/README.md
index 9a5093820e85..0e15939cdfb8 100644
--- a/openhands/agenthub/codeact_agent/README.md
+++ b/openhands/agenthub/codeact_agent/README.md
@@ -110,4 +110,4 @@ The agent is implemented in two main files:
 2. `function_calling.py`: Tool definitions and function calling interface with:
    - Tool parameter specifications
    - Tool descriptions and examples
-   - Function calling response parsing
\ No newline at end of file
+   - Function calling response parsing
diff --git a/openhands/events/observation/commands.py b/openhands/events/observation/commands.py
index a182168e694a..b522b5c47283 100644
--- a/openhands/events/observation/commands.py
+++ b/openhands/events/observation/commands.py
@@ -23,6 +23,10 @@ def error(self) -> bool:
     def message(self) -> str:
         return f'Command `{self.command}` executed with exit code {self.exit_code}.'
 
+    @property
+    def success(self) -> bool:
+        return not self.error
+
     def __str__(self) -> str:
         return f'**CmdOutputObservation (source={self.source}, exit code={self.exit_code})**\n{self.content}'
 
@@ -42,5 +46,9 @@ def error(self) -> bool:
     def message(self) -> str:
         return 'Code executed in IPython cell.'
 
+    @property
+    def success(self) -> bool:
+        return True  # IPython cells are always considered successful
+
     def __str__(self) -> str:
         return f'**IPythonRunCellObservation**\n{self.content}'
diff --git a/openhands/events/serialization/event.py b/openhands/events/serialization/event.py
index 78f7940626d4..6ee82a1cc81c 100644
--- a/openhands/events/serialization/event.py
+++ b/openhands/events/serialization/event.py
@@ -83,6 +83,9 @@ def event_to_dict(event: 'Event') -> dict:
     elif 'observation' in d:
         d['content'] = props.pop('content', '')
         d['extras'] = props
+        # Include success field for CmdOutputObservation
+        if hasattr(event, 'success'):
+            d['success'] = event.success
     else:
         raise ValueError('Event must be either action or observation')
     return d
diff --git a/openhands/events/serialization/observation.py b/openhands/events/serialization/observation.py
index 9030ccb1e1dd..d9d8dc51adaf 100644
--- a/openhands/events/serialization/observation.py
+++ b/openhands/events/serialization/observation.py
@@ -50,4 +50,5 @@ def observation_from_dict(observation: dict) -> Observation:
     observation.pop('message', None)
     content = observation.pop('content', '')
     extras = observation.pop('extras', {})
+
     return observation_class(content=content, **extras)
diff --git a/pyproject.toml b/pyproject.toml
index acae091bb6e9..aa81db8e5587 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -98,6 +98,7 @@ reportlab = "*"
 [tool.coverage.run]
 concurrency = ["gevent"]
 
+
 [tool.poetry.group.runtime.dependencies]
 jupyterlab = "*"
 notebook = "*"
@@ -128,6 +129,7 @@ ignore = ["D1"]
 [tool.ruff.lint.pydocstyle]
 convention = "google"
 
+
 [tool.poetry.group.evaluation.dependencies]
 streamlit = "*"
 whatthepatch = "*"
diff --git a/tests/unit/test_command_success.py b/tests/unit/test_command_success.py
new file mode 100644
index 000000000000..b52ceb4815c7
--- /dev/null
+++ b/tests/unit/test_command_success.py
@@ -0,0 +1,27 @@
+from openhands.events.observation.commands import (
+    CmdOutputObservation,
+    IPythonRunCellObservation,
+)
+
+
+def test_cmd_output_success():
+    # Test successful command
+    obs = CmdOutputObservation(
+        command_id=1, command='ls', content='file1.txt\nfile2.txt', exit_code=0
+    )
+    assert obs.success is True
+    assert obs.error is False
+
+    # Test failed command
+    obs = CmdOutputObservation(
+        command_id=2, command='ls', content='No such file or directory', exit_code=1
+    )
+    assert obs.success is False
+    assert obs.error is True
+
+
+def test_ipython_cell_success():
+    # IPython cells are always successful
+    obs = IPythonRunCellObservation(code='print("Hello")', content='Hello')
+    assert obs.success is True
+    assert obs.error is False
diff --git a/tests/unit/test_event_serialization.py b/tests/unit/test_event_serialization.py
new file mode 100644
index 000000000000..d1989a30bb09
--- /dev/null
+++ b/tests/unit/test_event_serialization.py
@@ -0,0 +1,18 @@
+from openhands.events.observation import CmdOutputObservation
+from openhands.events.serialization import event_to_dict
+
+
+def test_command_output_success_serialization():
+    # Test successful command
+    obs = CmdOutputObservation(
+        command_id=1, command='ls', content='file1.txt\nfile2.txt', exit_code=0
+    )
+    serialized = event_to_dict(obs)
+    assert serialized['success'] is True
+
+    # Test failed command
+    obs = CmdOutputObservation(
+        command_id=2, command='ls', content='No such file or directory', exit_code=1
+    )
+    serialized = event_to_dict(obs)
+    assert serialized['success'] is False
diff --git a/tests/unit/test_observation_serialization.py b/tests/unit/test_observation_serialization.py
index ae636ddf562b..67a95449b719 100644
--- a/tests/unit/test_observation_serialization.py
+++ b/tests/unit/test_observation_serialization.py
@@ -40,36 +40,23 @@ def serialization_deserialization(
 
 
 # Additional tests for various observation subclasses can be included here
-def test_observation_event_props_serialization_deserialization():
-    original_observation_dict = {
-        'id': 42,
-        'source': 'agent',
-        'timestamp': '2021-08-01T12:00:00',
-        'observation': 'run',
-        'message': 'Command `ls -l` executed with exit code 0.',
-        'extras': {
-            'exit_code': 0,
-            'command': 'ls -l',
-            'command_id': 3,
-            'hidden': False,
-            'interpreter_details': '',
-        },
-        'content': 'foo.txt',
-    }
-    serialization_deserialization(original_observation_dict, CmdOutputObservation)
-
+def test_success_field_serialization():
+    # Test success=True
+    obs = CmdOutputObservation(
+        content='Command succeeded',
+        exit_code=0,
+        command='ls -l',
+        command_id=3,
+    )
+    serialized = event_to_dict(obs)
+    assert serialized['success'] is True
 
-def test_command_output_observation_serialization_deserialization():
-    original_observation_dict = {
-        'observation': 'run',
-        'extras': {
-            'exit_code': 0,
-            'command': 'ls -l',
-            'command_id': 3,
-            'hidden': False,
-            'interpreter_details': '',
-        },
-        'message': 'Command `ls -l` executed with exit code 0.',
-        'content': 'foo.txt',
-    }
-    serialization_deserialization(original_observation_dict, CmdOutputObservation)
+    # Test success=False
+    obs = CmdOutputObservation(
+        content='No such file or directory',
+        exit_code=1,
+        command='ls -l',
+        command_id=3,
+    )
+    serialized = event_to_dict(obs)
+    assert serialized['success'] is False
diff --git a/tests/unit/test_security.py b/tests/unit/test_security.py
index fab27a3ec2f3..a36c66104f65 100644
--- a/tests/unit/test_security.py
+++ b/tests/unit/test_security.py
@@ -51,24 +51,48 @@ def add_events(event_stream: EventStream, data: list[tuple[Event, EventSource]])
 
 
 def test_msg(temp_dir: str):
-    file_store = get_file_store('local', temp_dir)
-    event_stream = EventStream('main', file_store)
-    policy = """
-    raise "Disallow ABC [risk=medium]" if:
-        (msg: Message)
-        "ABC" in msg.content
-    """
-    InvariantAnalyzer(event_stream, policy)
-    data = [
-        (MessageAction('Hello world!'), EventSource.USER),
-        (MessageAction('AB!'), EventSource.AGENT),
-        (MessageAction('Hello world!'), EventSource.USER),
-        (MessageAction('ABC!'), EventSource.AGENT),
+    mock_container = MagicMock()
+    mock_container.status = 'running'
+    mock_container.attrs = {
+        'NetworkSettings': {'Ports': {'8000/tcp': [{'HostPort': 34567}]}}
+    }
+    mock_docker = MagicMock()
+    mock_docker.from_env().containers.list.return_value = [mock_container]
+
+    mock_requests = MagicMock()
+    mock_requests.get().json.return_value = {'id': 'mock-session-id'}
+    mock_requests.post().json.side_effect = [
+        {'monitor_id': 'mock-monitor-id'},
+        [],  # First check
+        [],  # Second check
+        [],  # Third check
+        [
+            'PolicyViolation(Disallow ABC [risk=medium], ranges=[<2 ranges>])'
+        ],  # Fourth check
     ]
-    add_events(event_stream, data)
-    for i in range(3):
-        assert data[i][0].security_risk == ActionSecurityRisk.LOW
-    assert data[3][0].security_risk == ActionSecurityRisk.MEDIUM
+
+    with (
+        patch(f'{InvariantAnalyzer.__module__}.docker', mock_docker),
+        patch(f'{InvariantClient.__module__}.requests', mock_requests),
+    ):
+        file_store = get_file_store('local', temp_dir)
+        event_stream = EventStream('main', file_store)
+        policy = """
+        raise "Disallow ABC [risk=medium]" if:
+            (msg: Message)
+            "ABC" in msg.content
+        """
+        InvariantAnalyzer(event_stream, policy)
+        data = [
+            (MessageAction('Hello world!'), EventSource.USER),
+            (MessageAction('AB!'), EventSource.AGENT),
+            (MessageAction('Hello world!'), EventSource.USER),
+            (MessageAction('ABC!'), EventSource.AGENT),
+        ]
+        add_events(event_stream, data)
+        for i in range(3):
+            assert data[i][0].security_risk == ActionSecurityRisk.LOW
+        assert data[3][0].security_risk == ActionSecurityRisk.MEDIUM
 
 
 @pytest.mark.parametrize(
@@ -76,22 +100,44 @@ def test_msg(temp_dir: str):
     [('rm -rf root_dir', ActionSecurityRisk.MEDIUM), ['ls', ActionSecurityRisk.LOW]],
 )
 def test_cmd(cmd, expected_risk, temp_dir: str):
-    file_store = get_file_store('local', temp_dir)
-    event_stream = EventStream('main', file_store)
-    policy = """
-    raise "Disallow rm -rf [risk=medium]" if:
-        (call: ToolCall)
-        call is tool:run
-        match("rm -rf", call.function.arguments.command)
-    """
-    InvariantAnalyzer(event_stream, policy)
-    data = [
-        (MessageAction('Hello world!'), EventSource.USER),
-        (CmdRunAction(cmd), EventSource.USER),
+    mock_container = MagicMock()
+    mock_container.status = 'running'
+    mock_container.attrs = {
+        'NetworkSettings': {'Ports': {'8000/tcp': [{'HostPort': 34567}]}}
+    }
+    mock_docker = MagicMock()
+    mock_docker.from_env().containers.list.return_value = [mock_container]
+
+    mock_requests = MagicMock()
+    mock_requests.get().json.return_value = {'id': 'mock-session-id'}
+    mock_requests.post().json.side_effect = [
+        {'monitor_id': 'mock-monitor-id'},
+        [],  # First check
+        ['PolicyViolation(Disallow rm -rf [risk=medium], ranges=[<2 ranges>])']
+        if expected_risk == ActionSecurityRisk.MEDIUM
+        else [],  # Second check
     ]
-    add_events(event_stream, data)
-    assert data[0][0].security_risk == ActionSecurityRisk.LOW
-    assert data[1][0].security_risk == expected_risk
+
+    with (
+        patch(f'{InvariantAnalyzer.__module__}.docker', mock_docker),
+        patch(f'{InvariantClient.__module__}.requests', mock_requests),
+    ):
+        file_store = get_file_store('local', temp_dir)
+        event_stream = EventStream('main', file_store)
+        policy = """
+        raise "Disallow rm -rf [risk=medium]" if:
+            (call: ToolCall)
+            call is tool:run
+            match("rm -rf", call.function.arguments.command)
+        """
+        InvariantAnalyzer(event_stream, policy)
+        data = [
+            (MessageAction('Hello world!'), EventSource.USER),
+            (CmdRunAction(cmd), EventSource.USER),
+        ]
+        add_events(event_stream, data)
+        assert data[0][0].security_risk == ActionSecurityRisk.LOW
+        assert data[1][0].security_risk == expected_risk
 
 
 @pytest.mark.parametrize(
@@ -102,26 +148,49 @@ def test_cmd(cmd, expected_risk, temp_dir: str):
     ],
 )
 def test_leak_secrets(code, expected_risk, temp_dir: str):
-    file_store = get_file_store('local', temp_dir)
-    event_stream = EventStream('main', file_store)
-    policy = """
-    from invariant.detectors import secrets
-
-    raise "Disallow writing secrets [risk=medium]" if:
-        (call: ToolCall)
-        call is tool:run_ipython
-        any(secrets(call.function.arguments.code))
-    """
-    InvariantAnalyzer(event_stream, policy)
-    data = [
-        (MessageAction('Hello world!'), EventSource.USER),
-        (IPythonRunCellAction(code), EventSource.AGENT),
-        (IPythonRunCellAction('hello'), EventSource.AGENT),
+    mock_container = MagicMock()
+    mock_container.status = 'running'
+    mock_container.attrs = {
+        'NetworkSettings': {'Ports': {'8000/tcp': [{'HostPort': 34567}]}}
+    }
+    mock_docker = MagicMock()
+    mock_docker.from_env().containers.list.return_value = [mock_container]
+
+    mock_requests = MagicMock()
+    mock_requests.get().json.return_value = {'id': 'mock-session-id'}
+    mock_requests.post().json.side_effect = [
+        {'monitor_id': 'mock-monitor-id'},
+        [],  # First check
+        ['PolicyViolation(Disallow writing secrets [risk=medium], ranges=[<2 ranges>])']
+        if expected_risk == ActionSecurityRisk.MEDIUM
+        else [],  # Second check
+        [],  # Third check
     ]
-    add_events(event_stream, data)
-    assert data[0][0].security_risk == ActionSecurityRisk.LOW
-    assert data[1][0].security_risk == expected_risk
-    assert data[2][0].security_risk == ActionSecurityRisk.LOW
+
+    with (
+        patch(f'{InvariantAnalyzer.__module__}.docker', mock_docker),
+        patch(f'{InvariantClient.__module__}.requests', mock_requests),
+    ):
+        file_store = get_file_store('local', temp_dir)
+        event_stream = EventStream('main', file_store)
+        policy = """
+        from invariant.detectors import secrets
+
+        raise "Disallow writing secrets [risk=medium]" if:
+            (call: ToolCall)
+            call is tool:run_ipython
+            any(secrets(call.function.arguments.code))
+        """
+        InvariantAnalyzer(event_stream, policy)
+        data = [
+            (MessageAction('Hello world!'), EventSource.USER),
+            (IPythonRunCellAction(code), EventSource.AGENT),
+            (IPythonRunCellAction('hello'), EventSource.AGENT),
+        ]
+        add_events(event_stream, data)
+        assert data[0][0].security_risk == ActionSecurityRisk.LOW
+        assert data[1][0].security_risk == expected_risk
+        assert data[2][0].security_risk == ActionSecurityRisk.LOW
 
 
 def test_unsafe_python_code(temp_dir: str):
@@ -458,26 +527,48 @@ def default_config():
 def test_check_usertask(
     mock_litellm_completion, usertask, is_appropriate, default_config, temp_dir: str
 ):
-    file_store = get_file_store('local', temp_dir)
-    event_stream = EventStream('main', file_store)
-    analyzer = InvariantAnalyzer(event_stream)
-    mock_response = {'choices': [{'message': {'content': is_appropriate}}]}
-    mock_litellm_completion.return_value = mock_response
-    analyzer.guardrail_llm = LLM(config=default_config)
-    analyzer.check_browsing_alignment = True
-    data = [
-        (MessageAction(usertask), EventSource.USER),
+    mock_container = MagicMock()
+    mock_container.status = 'running'
+    mock_container.attrs = {
+        'NetworkSettings': {'Ports': {'8000/tcp': [{'HostPort': 34567}]}}
+    }
+    mock_docker = MagicMock()
+    mock_docker.from_env().containers.list.return_value = [mock_container]
+
+    mock_requests = MagicMock()
+    mock_requests.get().json.return_value = {'id': 'mock-session-id'}
+    mock_requests.post().json.side_effect = [
+        {'monitor_id': 'mock-monitor-id'},
+        [],
+        [
+            'PolicyViolation(Vulnerability in python code [risk=medium], ranges=[<2 ranges>])'
+        ],
     ]
-    add_events(event_stream, data)
-    event_list = list(event_stream.get_events())
 
-    if is_appropriate == 'No':
-        assert len(event_list) == 2
-        assert type(event_list[0]) == MessageAction
-        assert type(event_list[1]) == ChangeAgentStateAction
-    elif is_appropriate == 'Yes':
-        assert len(event_list) == 1
-        assert type(event_list[0]) == MessageAction
+    with (
+        patch(f'{InvariantAnalyzer.__module__}.docker', mock_docker),
+        patch(f'{InvariantClient.__module__}.requests', mock_requests),
+    ):
+        file_store = get_file_store('local', temp_dir)
+        event_stream = EventStream('main', file_store)
+        analyzer = InvariantAnalyzer(event_stream)
+        mock_response = {'choices': [{'message': {'content': is_appropriate}}]}
+        mock_litellm_completion.return_value = mock_response
+        analyzer.guardrail_llm = LLM(config=default_config)
+        analyzer.check_browsing_alignment = True
+        data = [
+            (MessageAction(usertask), EventSource.USER),
+        ]
+        add_events(event_stream, data)
+        event_list = list(event_stream.get_events())
+
+        if is_appropriate == 'No':
+            assert len(event_list) == 2
+            assert type(event_list[0]) == MessageAction
+            assert type(event_list[1]) == ChangeAgentStateAction
+        elif is_appropriate == 'Yes':
+            assert len(event_list) == 1
+            assert type(event_list[0]) == MessageAction
 
 
 @pytest.mark.parametrize(
@@ -491,23 +582,45 @@ def test_check_usertask(
 def test_check_fillaction(
     mock_litellm_completion, fillaction, is_harmful, default_config, temp_dir: str
 ):
-    file_store = get_file_store('local', temp_dir)
-    event_stream = EventStream('main', file_store)
-    analyzer = InvariantAnalyzer(event_stream)
-    mock_response = {'choices': [{'message': {'content': is_harmful}}]}
-    mock_litellm_completion.return_value = mock_response
-    analyzer.guardrail_llm = LLM(config=default_config)
-    analyzer.check_browsing_alignment = True
-    data = [
-        (BrowseInteractiveAction(browser_actions=fillaction), EventSource.AGENT),
+    mock_container = MagicMock()
+    mock_container.status = 'running'
+    mock_container.attrs = {
+        'NetworkSettings': {'Ports': {'8000/tcp': [{'HostPort': 34567}]}}
+    }
+    mock_docker = MagicMock()
+    mock_docker.from_env().containers.list.return_value = [mock_container]
+
+    mock_requests = MagicMock()
+    mock_requests.get().json.return_value = {'id': 'mock-session-id'}
+    mock_requests.post().json.side_effect = [
+        {'monitor_id': 'mock-monitor-id'},
+        [],
+        [
+            'PolicyViolation(Vulnerability in python code [risk=medium], ranges=[<2 ranges>])'
+        ],
     ]
-    add_events(event_stream, data)
-    event_list = list(event_stream.get_events())
-
-    if is_harmful == 'Yes':
-        assert len(event_list) == 2
-        assert type(event_list[0]) == BrowseInteractiveAction
-        assert type(event_list[1]) == ChangeAgentStateAction
-    elif is_harmful == 'No':
-        assert len(event_list) == 1
-        assert type(event_list[0]) == BrowseInteractiveAction
+
+    with (
+        patch(f'{InvariantAnalyzer.__module__}.docker', mock_docker),
+        patch(f'{InvariantClient.__module__}.requests', mock_requests),
+    ):
+        file_store = get_file_store('local', temp_dir)
+        event_stream = EventStream('main', file_store)
+        analyzer = InvariantAnalyzer(event_stream)
+        mock_response = {'choices': [{'message': {'content': is_harmful}}]}
+        mock_litellm_completion.return_value = mock_response
+        analyzer.guardrail_llm = LLM(config=default_config)
+        analyzer.check_browsing_alignment = True
+        data = [
+            (BrowseInteractiveAction(browser_actions=fillaction), EventSource.AGENT),
+        ]
+        add_events(event_stream, data)
+        event_list = list(event_stream.get_events())
+
+        if is_harmful == 'Yes':
+            assert len(event_list) == 2
+            assert type(event_list[0]) == BrowseInteractiveAction
+            assert type(event_list[1]) == ChangeAgentStateAction
+        elif is_harmful == 'No':
+            assert len(event_list) == 1
+            assert type(event_list[0]) == BrowseInteractiveAction

From 2f2ea9ec9177a6e53433d885b21169bd7900b756 Mon Sep 17 00:00:00 2001
From: mamoodi <mamoodiha@gmail.com>
Date: Wed, 11 Dec 2024 21:03:06 -0500
Subject: [PATCH 06/10] Update the doc for headless to include no continue
 (#5537)

---
 docs/modules/usage/how-to/headless-mode.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/modules/usage/how-to/headless-mode.md b/docs/modules/usage/how-to/headless-mode.md
index dfd4dd5e3e14..ff5e622de6a4 100644
--- a/docs/modules/usage/how-to/headless-mode.md
+++ b/docs/modules/usage/how-to/headless-mode.md
@@ -55,5 +55,5 @@ docker run -it \
     --add-host host.docker.internal:host-gateway \
     --name openhands-app-$(date +%Y%m%d%H%M%S) \
     docker.all-hands.dev/all-hands-ai/openhands:0.15 \
-    python -m openhands.core.main -t "write a bash script that prints hi"
+    python -m openhands.core.main -t "write a bash script that prints hi" --no-auto-continue
 ```

From ffd472d6b8804db1527d3f23a84df4d9ab108146 Mon Sep 17 00:00:00 2001
From: Engel Nyst <enyst@users.noreply.github.com>
Date: Thu, 12 Dec 2024 03:12:50 +0100
Subject: [PATCH 07/10] Update litellm (#5520)

---
 poetry.lock    | 33 +++++++++++++++++----------------
 pyproject.toml |  2 +-
 2 files changed, 18 insertions(+), 17 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index 11cb9c8862ad..a0459d9fa5ec 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.8.4 and should not be changed by hand.
 
 [[package]]
 name = "aiohappyeyeballs"
@@ -553,17 +553,17 @@ files = [
 
 [[package]]
 name = "boto3"
-version = "1.35.68"
+version = "1.35.78"
 description = "The AWS SDK for Python"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "boto3-1.35.68-py3-none-any.whl", hash = "sha256:9b26fa31901da7793c1dcd65eee9bab7e897d8aa1ffed0b5e1c3bce93d2aefe4"},
-    {file = "boto3-1.35.68.tar.gz", hash = "sha256:091d6bed1422370987a839bff3f8755df7404fc15e9fac2a48e8505356f07433"},
+    {file = "boto3-1.35.78-py3-none-any.whl", hash = "sha256:5ef7166fe5060637b92af8dc152cd7acecf96b3fc9c5456706a886cadb534391"},
+    {file = "boto3-1.35.78.tar.gz", hash = "sha256:fc8001519c8842e766ad3793bde3fbd0bb39e821a582fc12cf67876b8f3cf7f1"},
 ]
 
 [package.dependencies]
-botocore = ">=1.35.68,<1.36.0"
+botocore = ">=1.35.78,<1.36.0"
 jmespath = ">=0.7.1,<2.0.0"
 s3transfer = ">=0.10.0,<0.11.0"
 
@@ -572,13 +572,13 @@ crt = ["botocore[crt] (>=1.21.0,<2.0a0)"]
 
 [[package]]
 name = "botocore"
-version = "1.35.68"
+version = "1.35.78"
 description = "Low-level, data-driven core of boto 3."
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "botocore-1.35.68-py3-none-any.whl", hash = "sha256:599139d5564291f5be873800711f9e4e14a823395ae9ce7b142be775e9849b94"},
-    {file = "botocore-1.35.68.tar.gz", hash = "sha256:42c3700583a82f2b5316281a073d644a521d6358837e2b446dc458ba5d990fb4"},
+    {file = "botocore-1.35.78-py3-none-any.whl", hash = "sha256:41c37bd7c0326f25122f33ec84fb80fc0a14d7fcc9961431b0e57568e88c9cb5"},
+    {file = "botocore-1.35.78.tar.gz", hash = "sha256:6905036c25449ae8dba5e950e4b908e4b8a6fe6b516bf61e007ecb62fa21f323"},
 ]
 
 [package.dependencies]
@@ -3739,22 +3739,23 @@ types-tqdm = "*"
 
 [[package]]
 name = "litellm"
-version = "1.52.15"
+version = "1.54.1"
 description = "Library to easily interface with LLM API providers"
 optional = false
 python-versions = "!=2.7.*,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,!=3.7.*,>=3.8"
 files = [
-    {file = "litellm-1.52.15-py3-none-any.whl", hash = "sha256:8a2d8e2526c5e7afb3006b0214d3c348778462fefafd582fd76bb7f5c35d28d0"},
-    {file = "litellm-1.52.15.tar.gz", hash = "sha256:11a61b1b033ddff9d480da66c00acc9d3e4fbfeed166d1b0de8eda16c684116e"},
+    {file = "litellm-1.54.1-py3-none-any.whl", hash = "sha256:d8e60d4a5e8decb0234a1e8c20351c904aec561fb4025df7df3d0d7ea81ca442"},
+    {file = "litellm-1.54.1.tar.gz", hash = "sha256:b5a8fc99160fab0699b9258457432b3975499218ffcf1b515709808b2ce5a2d7"},
 ]
 
 [package.dependencies]
 aiohttp = "*"
 click = "*"
+httpx = ">=0.23.0,<0.28.0"
 importlib-metadata = ">=6.8.0"
 jinja2 = ">=3.1.2,<4.0.0"
 jsonschema = ">=4.22.0,<5.0.0"
-openai = ">=1.54.0"
+openai = ">=1.55.3"
 pydantic = ">=2.0.0,<3.0.0"
 python-dotenv = ">=0.2.0"
 requests = ">=2.31.0,<3.0.0"
@@ -5413,13 +5414,13 @@ sympy = "*"
 
 [[package]]
 name = "openai"
-version = "1.55.0"
+version = "1.57.2"
 description = "The official Python library for the openai API"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "openai-1.55.0-py3-none-any.whl", hash = "sha256:446e08918f8dd70d8723274be860404c8c7cc46b91b93bbc0ef051f57eb503c1"},
-    {file = "openai-1.55.0.tar.gz", hash = "sha256:6c0975ac8540fe639d12b4ff5a8e0bf1424c844c4a4251148f59f06c4b2bd5db"},
+    {file = "openai-1.57.2-py3-none-any.whl", hash = "sha256:f7326283c156fdee875746e7e54d36959fb198eadc683952ee05e3302fbd638d"},
+    {file = "openai-1.57.2.tar.gz", hash = "sha256:5f49fd0f38e9f2131cda7deb45dafdd1aee4f52a637e190ce0ecf40147ce8cee"},
 ]
 
 [package.dependencies]
@@ -10060,4 +10061,4 @@ testing = ["coverage[toml]", "zope.event", "zope.testing"]
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.12"
-content-hash = "ff3daee70a197e3f6ff460bd1e14be7ed443a100805947ee18df7afb7d898584"
+content-hash = "039581f859df4446dc9491bf39913a54f53c5d71e9bad86ff71ddd1d1682f9af"
diff --git a/pyproject.toml b/pyproject.toml
index aa81db8e5587..4da046981212 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -14,7 +14,7 @@ packages = [
 python = "^3.12"
 datasets = "*"
 pandas = "*"
-litellm = "^1.52.3"
+litellm = "^1.54.1"
 google-generativeai = "*" # To use litellm with Gemini Pro API
 google-api-python-client = "*" # For Google Sheets API
 google-auth-httplib2 = "*" # For Google Sheets authentication

From 7e4c1c733b84ecfc35f7f5601219be54939321f7 Mon Sep 17 00:00:00 2001
From: Cheng Yang <93481273+young010101@users.noreply.github.com>
Date: Thu, 12 Dec 2024 10:21:46 +0800
Subject: [PATCH 08/10] feat(sandbox): add support for extra Docker build
 arguments (#5447)

---
 config.template.toml                                |  3 +++
 openhands/core/config/sandbox_config.py             |  1 +
 openhands/runtime/builder/base.py                   |  2 ++
 openhands/runtime/builder/docker.py                 |  2 +-
 openhands/runtime/builder/remote.py                 |  8 +++++++-
 .../runtime/impl/eventstream/eventstream_runtime.py |  1 +
 openhands/runtime/utils/runtime_build.py            | 13 ++++++++++++-
 tests/unit/test_runtime_build.py                    |  3 +++
 8 files changed, 30 insertions(+), 3 deletions(-)

diff --git a/config.template.toml b/config.template.toml
index 9d84c5a7ff34..6f626e6bee90 100644
--- a/config.template.toml
+++ b/config.template.toml
@@ -217,6 +217,9 @@ llm_config = 'gpt3'
 # Use host network
 #use_host_network = false
 
+# runtime extra build args
+#runtime_extra_build_args = ["--network=host", "--add-host=host.docker.internal:host-gateway"]
+
 # Enable auto linting after editing
 #enable_auto_lint = false
 
diff --git a/openhands/core/config/sandbox_config.py b/openhands/core/config/sandbox_config.py
index fee44cbaffd6..c6367067557b 100644
--- a/openhands/core/config/sandbox_config.py
+++ b/openhands/core/config/sandbox_config.py
@@ -48,6 +48,7 @@ class SandboxConfig:
         False  # once enabled, OpenHands would lint files after editing
     )
     use_host_network: bool = False
+    runtime_extra_build_args: list[str] | None = None
     initialize_plugins: bool = True
     force_rebuild_runtime: bool = False
     runtime_extra_deps: str | None = None
diff --git a/openhands/runtime/builder/base.py b/openhands/runtime/builder/base.py
index df2ee99035c9..acfe3c60fb89 100644
--- a/openhands/runtime/builder/base.py
+++ b/openhands/runtime/builder/base.py
@@ -8,6 +8,7 @@ def build(
         path: str,
         tags: list[str],
         platform: str | None = None,
+        extra_build_args: list[str] | None = None,
     ) -> str:
         """Build the runtime image.
 
@@ -15,6 +16,7 @@ def build(
             path (str): The path to the runtime image's build directory.
             tags (list[str]): The tags to apply to the runtime image (e.g., ["repo:my-repo", "sha:my-sha"]).
             platform (str, optional): The target platform for the build. Defaults to None.
+            extra_build_args (list[str], optional): Additional build arguments to pass to the builder. Defaults to None.
 
         Returns:
             str: The name:tag of the runtime image after build (e.g., "repo:sha").
diff --git a/openhands/runtime/builder/docker.py b/openhands/runtime/builder/docker.py
index 9cdf0f998fcd..880b1c73c578 100644
--- a/openhands/runtime/builder/docker.py
+++ b/openhands/runtime/builder/docker.py
@@ -28,8 +28,8 @@ def build(
         path: str,
         tags: list[str],
         platform: str | None = None,
-        use_local_cache: bool = False,
         extra_build_args: list[str] | None = None,
+        use_local_cache: bool = False,
     ) -> str:
         """Builds a Docker image using BuildKit and handles the build logs appropriately.
 
diff --git a/openhands/runtime/builder/remote.py b/openhands/runtime/builder/remote.py
index c9d3228a70af..5cfe1a4943a4 100644
--- a/openhands/runtime/builder/remote.py
+++ b/openhands/runtime/builder/remote.py
@@ -23,7 +23,13 @@ def __init__(self, api_url: str, api_key: str):
         self.session = requests.Session()
         self.session.headers.update({'X-API-Key': self.api_key})
 
-    def build(self, path: str, tags: list[str], platform: str | None = None) -> str:
+    def build(
+        self,
+        path: str,
+        tags: list[str],
+        platform: str | None = None,
+        extra_build_args: list[str] | None = None,
+    ) -> str:
         """Builds a Docker image using the Runtime API's /build endpoint."""
         # Create a tar archive of the build context
         tar_buffer = io.BytesIO()
diff --git a/openhands/runtime/impl/eventstream/eventstream_runtime.py b/openhands/runtime/impl/eventstream/eventstream_runtime.py
index 8cedcbe54603..2dc99aa2aa36 100644
--- a/openhands/runtime/impl/eventstream/eventstream_runtime.py
+++ b/openhands/runtime/impl/eventstream/eventstream_runtime.py
@@ -229,6 +229,7 @@ async def connect(self):
                     platform=self.config.sandbox.platform,
                     extra_deps=self.config.sandbox.runtime_extra_deps,
                     force_rebuild=self.config.sandbox.force_rebuild_runtime,
+                    extra_build_args=self.config.sandbox.runtime_extra_build_args,
                 )
 
             self.log(
diff --git a/openhands/runtime/utils/runtime_build.py b/openhands/runtime/utils/runtime_build.py
index eab98befe538..de939efd9a38 100644
--- a/openhands/runtime/utils/runtime_build.py
+++ b/openhands/runtime/utils/runtime_build.py
@@ -111,6 +111,7 @@ def build_runtime_image(
     build_folder: str | None = None,
     dry_run: bool = False,
     force_rebuild: bool = False,
+    extra_build_args: List[str] | None = None,
 ) -> str:
     """Prepares the final docker build folder.
     If dry_run is False, it will also build the OpenHands runtime Docker image using the docker build folder.
@@ -123,6 +124,7 @@ def build_runtime_image(
     - build_folder (str): The directory to use for the build. If not provided a temporary directory will be used
     - dry_run (bool): if True, it will only ready the build folder. It will not actually build the Docker image
     - force_rebuild (bool): if True, it will create the Dockerfile which uses the base_image
+    - extra_build_args (List[str]): Additional build arguments to pass to the builder
 
     Returns:
     - str: <image_repo>:<MD5 hash>. Where MD5 hash is the hash of the docker build folder
@@ -139,6 +141,7 @@ def build_runtime_image(
                 dry_run=dry_run,
                 force_rebuild=force_rebuild,
                 platform=platform,
+                extra_build_args=extra_build_args,
             )
             return result
 
@@ -150,6 +153,7 @@ def build_runtime_image(
         dry_run=dry_run,
         force_rebuild=force_rebuild,
         platform=platform,
+        extra_build_args=extra_build_args,
     )
     return result
 
@@ -162,6 +166,7 @@ def build_runtime_image_in_folder(
     dry_run: bool,
     force_rebuild: bool,
     platform: str | None = None,
+    extra_build_args: List[str] | None = None,
 ) -> str:
     runtime_image_repo, _ = get_runtime_image_repo_and_tag(base_image)
     lock_tag = f'oh_v{oh_version}_{get_hash_for_lock_files(base_image)}'
@@ -193,6 +198,7 @@ def build_runtime_image_in_folder(
                 lock_tag,
                 versioned_tag,
                 platform,
+                extra_build_args=extra_build_args,
             )
         return hash_image_name
 
@@ -234,6 +240,7 @@ def build_runtime_image_in_folder(
             if build_from == BuildFromImageType.SCRATCH
             else None,
             platform=platform,
+            extra_build_args=extra_build_args,
         )
 
     return hash_image_name
@@ -339,6 +346,7 @@ def _build_sandbox_image(
     lock_tag: str,
     versioned_tag: str | None,
     platform: str | None = None,
+    extra_build_args: List[str] | None = None,
 ):
     """Build and tag the sandbox image. The image will be tagged with all tags that do not yet exist"""
     names = [
@@ -350,7 +358,10 @@ def _build_sandbox_image(
     names = [name for name in names if not runtime_builder.image_exists(name, False)]
 
     image_name = runtime_builder.build(
-        path=str(build_folder), tags=names, platform=platform
+        path=str(build_folder),
+        tags=names,
+        platform=platform,
+        extra_build_args=extra_build_args,
     )
     if not image_name:
         raise RuntimeError(f'Build failed for image {names}')
diff --git a/tests/unit/test_runtime_build.py b/tests/unit/test_runtime_build.py
index 79a7c9a22b6b..b5cbd91056e8 100644
--- a/tests/unit/test_runtime_build.py
+++ b/tests/unit/test_runtime_build.py
@@ -239,6 +239,7 @@ def test_build_runtime_image_from_scratch():
                 f'{get_runtime_image_repo()}:{OH_VERSION}_mock-versioned-tag',
             ],
             platform=None,
+            extra_build_args=None,
         )
         assert (
             image_name
@@ -333,6 +334,7 @@ def image_exists_side_effect(image_name, *args):
                 # VERSION tag will NOT be included except from scratch
             ],
             platform=None,
+            extra_build_args=None,
         )
         mock_prep_build_folder.assert_called_once_with(
             ANY,
@@ -391,6 +393,7 @@ def image_exists_side_effect(image_name, *args):
                 # VERSION tag will NOT be included except from scratch
             ],
             platform=None,
+            extra_build_args=None,
         )
         mock_prep_build_folder.assert_called_once_with(
             ANY,

From 425ccb0fbb0af13c4262b50ccd8d6baa9680591f Mon Sep 17 00:00:00 2001
From: Engel Nyst <enyst@users.noreply.github.com>
Date: Thu, 12 Dec 2024 03:48:06 +0100
Subject: [PATCH 09/10] Clean up empty content fix (revert #4935) (#5539)

---
 openhands/core/message.py          | 8 --------
 openhands/llm/debug_mixin.py       | 2 +-
 openhands/llm/fn_call_converter.py | 2 +-
 3 files changed, 2 insertions(+), 10 deletions(-)

diff --git a/openhands/core/message.py b/openhands/core/message.py
index a5b67917eaee..5fcd5a2b317b 100644
--- a/openhands/core/message.py
+++ b/openhands/core/message.py
@@ -105,14 +105,6 @@ def _list_serializer(self) -> dict:
 
         message_dict: dict = {'content': content, 'role': self.role}
 
-        # pop content if it's empty
-        if not content or (
-            len(content) == 1
-            and content[0]['type'] == 'text'
-            and content[0]['text'] == ''
-        ):
-            message_dict.pop('content')
-
         if role_tool_with_prompt_caching:
             message_dict['cache_control'] = {'type': 'ephemeral'}
 
diff --git a/openhands/llm/debug_mixin.py b/openhands/llm/debug_mixin.py
index f386613d6fba..6a247471ee78 100644
--- a/openhands/llm/debug_mixin.py
+++ b/openhands/llm/debug_mixin.py
@@ -16,7 +16,7 @@ def log_prompt(self, messages: list[dict[str, Any]] | dict[str, Any]):
         debug_message = MESSAGE_SEPARATOR.join(
             self._format_message_content(msg)
             for msg in messages
-            if msg.get('content', None)
+            if msg['content'] is not None
         )
 
         if debug_message:
diff --git a/openhands/llm/fn_call_converter.py b/openhands/llm/fn_call_converter.py
index 491ef906eaa1..ae4d87f8d62b 100644
--- a/openhands/llm/fn_call_converter.py
+++ b/openhands/llm/fn_call_converter.py
@@ -321,7 +321,7 @@ def convert_fncall_messages_to_non_fncall_messages(
     first_user_message_encountered = False
     for message in messages:
         role = message['role']
-        content = message.get('content', '')
+        content = message['content']
 
         # 1. SYSTEM MESSAGES
         # append system prompt suffix to content

From e979f51ea5bd53951a0d10dbe1d5725fc1d37dc6 Mon Sep 17 00:00:00 2001
From: Graham Neubig <neubig@gmail.com>
Date: Wed, 11 Dec 2024 22:18:38 -0800
Subject: [PATCH 10/10] Fix chat input not clearing after image paste/drop
 (#5342)

Co-authored-by: openhands <openhands@all-hands.dev>
---
 .../components/interactive-chat-box.test.tsx  | 58 ++++++++++++++++++-
 .../components/features/chat/chat-input.tsx   | 10 +++-
 .../features/chat/interactive-chat-box.tsx    |  3 +
 3 files changed, 67 insertions(+), 4 deletions(-)

diff --git a/frontend/__tests__/components/interactive-chat-box.test.tsx b/frontend/__tests__/components/interactive-chat-box.test.tsx
index fa0d3a1b8e30..fe6ba329763b 100644
--- a/frontend/__tests__/components/interactive-chat-box.test.tsx
+++ b/frontend/__tests__/components/interactive-chat-box.test.tsx
@@ -1,4 +1,4 @@
-import { render, screen, within } from "@testing-library/react";
+import { render, screen, within, fireEvent } from "@testing-library/react";
 import userEvent from "@testing-library/user-event";
 import { afterEach, beforeAll, describe, expect, it, vi } from "vitest";
 import { InteractiveChatBox } from "#/components/features/chat/interactive-chat-box";
@@ -131,4 +131,60 @@ describe("InteractiveChatBox", () => {
     await user.click(stopButton);
     expect(onStopMock).toHaveBeenCalledOnce();
   });
+
+  it("should handle image upload and message submission correctly", async () => {
+    const user = userEvent.setup();
+    const onSubmit = vi.fn();
+    const onStop = vi.fn();
+    const onChange = vi.fn();
+
+    const { rerender } = render(
+      <InteractiveChatBox
+        onSubmit={onSubmit}
+        onStop={onStop}
+        onChange={onChange}
+        value="test message"
+      />
+    );
+
+    // Upload an image via the upload button - this should NOT clear the text input
+    const file = new File(["dummy content"], "test.png", { type: "image/png" });
+    const input = screen.getByTestId("upload-image-input");
+    await user.upload(input, file);
+
+    // Verify text input was not cleared
+    expect(screen.getByRole("textbox")).toHaveValue("test message");
+    expect(onChange).not.toHaveBeenCalledWith("");
+
+    // Submit the message with image
+    const submitButton = screen.getByRole("button", { name: "Send" });
+    await user.click(submitButton);
+
+    // Verify onSubmit was called with the message and image
+    expect(onSubmit).toHaveBeenCalledWith("test message", [file]);
+
+    // Verify onChange was called to clear the text input
+    expect(onChange).toHaveBeenCalledWith("");
+
+    // Simulate parent component updating the value prop
+    rerender(
+      <InteractiveChatBox
+        onSubmit={onSubmit}
+        onStop={onStop}
+        onChange={onChange}
+        value=""
+      />
+    );
+
+    // Verify the text input was cleared
+    expect(screen.getByRole("textbox")).toHaveValue("");
+
+    // Upload another image - this should NOT clear the text input
+    onChange.mockClear();
+    await user.upload(input, file);
+
+    // Verify text input is still empty and onChange was not called
+    expect(screen.getByRole("textbox")).toHaveValue("");
+    expect(onChange).not.toHaveBeenCalled();
+  });
 });
diff --git a/frontend/src/components/features/chat/chat-input.tsx b/frontend/src/components/features/chat/chat-input.tsx
index 815eb1933720..44a86d675c22 100644
--- a/frontend/src/components/features/chat/chat-input.tsx
+++ b/frontend/src/components/features/chat/chat-input.tsx
@@ -83,9 +83,13 @@ export function ChatInput({
   };
 
   const handleSubmitMessage = () => {
-    if (textareaRef.current?.value) {
-      onSubmit(textareaRef.current.value);
-      textareaRef.current.value = "";
+    if (value || (textareaRef.current?.value && !value)) {
+      onSubmit(value || textareaRef.current?.value || "");
+      if (value) {
+        onChange?.("");
+      } else if (textareaRef.current) {
+        textareaRef.current.value = "";
+      }
     }
   };
 
diff --git a/frontend/src/components/features/chat/interactive-chat-box.tsx b/frontend/src/components/features/chat/interactive-chat-box.tsx
index e96339adf0a7..09dcf84b32d6 100644
--- a/frontend/src/components/features/chat/interactive-chat-box.tsx
+++ b/frontend/src/components/features/chat/interactive-chat-box.tsx
@@ -38,6 +38,9 @@ export function InteractiveChatBox({
   const handleSubmit = (message: string) => {
     onSubmit(message, images);
     setImages([]);
+    if (message) {
+      onChange?.("");
+    }
   };
 
   return (