From 1dd8c3fdb100e13068d83dc5de9fc65529bca464 Mon Sep 17 00:00:00 2001
From: Robert Brennan <contact@rbren.io>
Date: Fri, 25 Oct 2024 23:15:09 -0400
Subject: [PATCH 01/60] prevent evenstream from looping back on itself

---
 openhands/events/stream.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/openhands/events/stream.py b/openhands/events/stream.py
index 1e4c3b9d539..cda1578295a 100644
--- a/openhands/events/stream.py
+++ b/openhands/events/stream.py
@@ -138,6 +138,10 @@ def add_event(self, event: Event, source: EventSource):
             asyncio.run(self.async_add_event(event, source))
 
     async def async_add_event(self, event: Event, source: EventSource):
+        if event._id is not None:  # type: ignore [attr-defined]
+            raise ValueError(
+                'Event already has an ID. It was probably added back to the EventStream from inside a handler, trigging a loop.'
+            )
         with self._lock:
             event._id = self._cur_id  # type: ignore [attr-defined]
             self._cur_id += 1

From ba3c558ff7cd2dfb51967857c1c61253d603734e Mon Sep 17 00:00:00 2001
From: Robert Brennan <contact@rbren.io>
Date: Fri, 25 Oct 2024 23:17:42 -0400
Subject: [PATCH 02/60] use hasattr

---
 openhands/events/stream.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/openhands/events/stream.py b/openhands/events/stream.py
index cda1578295a..b2368da2030 100644
--- a/openhands/events/stream.py
+++ b/openhands/events/stream.py
@@ -138,7 +138,7 @@ def add_event(self, event: Event, source: EventSource):
             asyncio.run(self.async_add_event(event, source))
 
     async def async_add_event(self, event: Event, source: EventSource):
-        if event._id is not None:  # type: ignore [attr-defined]
+        if hasattr(event, '_id') and event.id is not None:
             raise ValueError(
                 'Event already has an ID. It was probably added back to the EventStream from inside a handler, trigging a loop.'
             )

From 1b72a8966b406983c0ca9d45daf0355a364fcd62 Mon Sep 17 00:00:00 2001
From: Robert Brennan <contact@rbren.io>
Date: Fri, 25 Oct 2024 23:30:56 -0400
Subject: [PATCH 03/60] refactor a bit

---
 evaluation/EDA/run_infer.py                   |  2 +-
 evaluation/agent_bench/run_infer.py           |  2 +-
 evaluation/aider_bench/run_infer.py           |  2 +-
 evaluation/biocoder/run_infer.py              |  2 +-
 evaluation/bird/run_infer.py                  |  2 +-
 evaluation/browsing_delegation/run_infer.py   |  2 +-
 evaluation/gaia/run_infer.py                  |  2 +-
 evaluation/gorilla/run_infer.py               |  2 +-
 evaluation/gpqa/run_infer.py                  |  2 +-
 evaluation/humanevalfix/run_infer.py          |  2 +-
 evaluation/integration_tests/run_infer.py     |  2 +-
 evaluation/logic_reasoning/run_infer.py       |  2 +-
 evaluation/miniwob/run_infer.py               |  2 +-
 evaluation/mint/run_infer.py                  |  2 +-
 evaluation/swe_bench/run_infer.py             |  8 ++--
 .../swe_bench/scripts/eval/compare_outputs.py |  0
 .../scripts/eval/convert_oh_output_to_md.py   |  0
 .../scripts/eval/summarize_outputs.py         |  0
 .../scripts/setup/compare_patch_filename.py   |  0
 evaluation/toolqa/run_infer.py                |  2 +-
 evaluation/webarena/run_infer.py              |  2 +-
 openhands/controller/agent_controller.py      | 38 ++++++++-----------
 openhands/controller/state/state.py           | 13 +++++--
 23 files changed, 44 insertions(+), 47 deletions(-)
 mode change 100755 => 100644 evaluation/swe_bench/scripts/eval/compare_outputs.py
 mode change 100755 => 100644 evaluation/swe_bench/scripts/eval/convert_oh_output_to_md.py
 mode change 100755 => 100644 evaluation/swe_bench/scripts/eval/summarize_outputs.py
 mode change 100755 => 100644 evaluation/swe_bench/scripts/setup/compare_patch_filename.py

diff --git a/evaluation/EDA/run_infer.py b/evaluation/EDA/run_infer.py
index 2c896939a75..c4f6c118411 100644
--- a/evaluation/EDA/run_infer.py
+++ b/evaluation/EDA/run_infer.py
@@ -158,7 +158,7 @@ def process_instance(
         metadata=metadata,
         history=histories,
         metrics=metrics,
-        error=state.last_error if state and state.last_error else None,
+        error=state.get_last_error() if state and state.get_last_error() else None,
         test_result={
             'success': test_result,
             'final_message': final_message,
diff --git a/evaluation/agent_bench/run_infer.py b/evaluation/agent_bench/run_infer.py
index d6fcc62e079..e05c66e9135 100644
--- a/evaluation/agent_bench/run_infer.py
+++ b/evaluation/agent_bench/run_infer.py
@@ -283,7 +283,7 @@ def process_instance(
         metadata=metadata,
         history=histories,
         metrics=metrics,
-        error=state.last_error if state and state.last_error else None,
+        error=state.get_last_error() if state and state.get_last_error() else None,
         test_result={
             'agent_answer': agent_answer,
             'final_answer': final_ans,
diff --git a/evaluation/aider_bench/run_infer.py b/evaluation/aider_bench/run_infer.py
index fa1bb9534a8..9b7f33d404c 100644
--- a/evaluation/aider_bench/run_infer.py
+++ b/evaluation/aider_bench/run_infer.py
@@ -261,7 +261,7 @@ def process_instance(
         metadata=metadata,
         history=histories,
         metrics=metrics,
-        error=state.last_error if state and state.last_error else None,
+        error=state.get_last_error() if state and state.get_last_error() else None,
         test_result=test_result,
     )
     return output
diff --git a/evaluation/biocoder/run_infer.py b/evaluation/biocoder/run_infer.py
index 4535ccba4e4..a05a6aa83aa 100644
--- a/evaluation/biocoder/run_infer.py
+++ b/evaluation/biocoder/run_infer.py
@@ -311,7 +311,7 @@ def process_instance(
         metadata=metadata,
         history=histories,
         metrics=metrics,
-        error=state.last_error if state and state.last_error else None,
+        error=state.get_last_error() if state and state.get_last_error() else None,
         test_result=test_result,
     )
     return output
diff --git a/evaluation/bird/run_infer.py b/evaluation/bird/run_infer.py
index adb498cd2eb..d81e873636c 100644
--- a/evaluation/bird/run_infer.py
+++ b/evaluation/bird/run_infer.py
@@ -440,7 +440,7 @@ def execute_sql(db_path, sql):
         metadata=metadata,
         history=histories,
         metrics=metrics,
-        error=state.last_error if state and state.last_error else None,
+        error=state.get_last_error() if state and state.get_last_error() else None,
         test_result=test_result,
     )
     return output
diff --git a/evaluation/browsing_delegation/run_infer.py b/evaluation/browsing_delegation/run_infer.py
index c9fe2ebd18b..38c91cae18d 100644
--- a/evaluation/browsing_delegation/run_infer.py
+++ b/evaluation/browsing_delegation/run_infer.py
@@ -121,7 +121,7 @@ def process_instance(
         metadata=metadata,
         history=histories,
         metrics=metrics,
-        error=state.last_error if state and state.last_error else None,
+        error=state.get_last_error() if state and state.get_last_error() else None,
         test_result={
             'query': instance.instruction,
             'action': last_delegate_action,
diff --git a/evaluation/gaia/run_infer.py b/evaluation/gaia/run_infer.py
index c02cd0aee73..d72a8a449ea 100644
--- a/evaluation/gaia/run_infer.py
+++ b/evaluation/gaia/run_infer.py
@@ -213,7 +213,7 @@ def process_instance(
         metadata=metadata,
         history=histories,
         metrics=metrics,
-        error=state.last_error if state and state.last_error else None,
+        error=state.get_last_error() if state and state.get_last_error() else None,
         test_result=test_result,
     )
     return output
diff --git a/evaluation/gorilla/run_infer.py b/evaluation/gorilla/run_infer.py
index 873cb7f8969..9881cea88c9 100644
--- a/evaluation/gorilla/run_infer.py
+++ b/evaluation/gorilla/run_infer.py
@@ -121,7 +121,7 @@ def process_instance(
         metadata=metadata,
         history=histories,
         metrics=metrics,
-        error=state.last_error if state and state.last_error else None,
+        error=state.get_last_error() if state and state.get_last_error() else None,
         test_result={
             'text': model_answer_raw,
             'correct': correct,
diff --git a/evaluation/gpqa/run_infer.py b/evaluation/gpqa/run_infer.py
index 8fd4034c9d5..81f09ef5223 100644
--- a/evaluation/gpqa/run_infer.py
+++ b/evaluation/gpqa/run_infer.py
@@ -302,7 +302,7 @@ def process_instance(
         metadata=metadata,
         history=state.history.compatibility_for_eval_history_pairs(),
         metrics=metrics,
-        error=state.last_error if state and state.last_error else None,
+        error=state.get_last_error() if state and state.get_last_error() else None,
         test_result={
             'result': test_result,
             'found_answers': found_answers,
diff --git a/evaluation/humanevalfix/run_infer.py b/evaluation/humanevalfix/run_infer.py
index 25fee65561f..ae445faaa4a 100644
--- a/evaluation/humanevalfix/run_infer.py
+++ b/evaluation/humanevalfix/run_infer.py
@@ -264,7 +264,7 @@ def process_instance(
         metadata=metadata,
         history=histories,
         metrics=metrics,
-        error=state.last_error if state and state.last_error else None,
+        error=state.get_last_error() if state and state.get_last_error() else None,
         test_result=test_result,
     )
     return output
diff --git a/evaluation/integration_tests/run_infer.py b/evaluation/integration_tests/run_infer.py
index a530041f92f..95dbb447841 100644
--- a/evaluation/integration_tests/run_infer.py
+++ b/evaluation/integration_tests/run_infer.py
@@ -134,7 +134,7 @@ def process_instance(
         metadata=metadata,
         history=histories,
         metrics=metrics,
-        error=state.last_error if state and state.last_error else None,
+        error=state.get_last_error() if state and state.get_last_error() else None,
         test_result=test_result.model_dump(),
     )
     return output
diff --git a/evaluation/logic_reasoning/run_infer.py b/evaluation/logic_reasoning/run_infer.py
index 5b7d35f2113..57e0ec9582f 100644
--- a/evaluation/logic_reasoning/run_infer.py
+++ b/evaluation/logic_reasoning/run_infer.py
@@ -256,7 +256,7 @@ def process_instance(
         metadata=metadata,
         history=histories,
         metrics=metrics,
-        error=state.last_error if state and state.last_error else None,
+        error=state.get_last_error() if state and state.get_last_error() else None,
         test_result=test_result,
     )
     return output
diff --git a/evaluation/miniwob/run_infer.py b/evaluation/miniwob/run_infer.py
index 9c2aaf1e096..58fc9da6cc1 100644
--- a/evaluation/miniwob/run_infer.py
+++ b/evaluation/miniwob/run_infer.py
@@ -173,7 +173,7 @@ def process_instance(
         metadata=metadata,
         history=histories,
         metrics=metrics,
-        error=state.last_error if state and state.last_error else None,
+        error=state.get_last_error() if state and state.get_last_error() else None,
         test_result={
             'reward': reward,
         },
diff --git a/evaluation/mint/run_infer.py b/evaluation/mint/run_infer.py
index 8017b194d8d..554940ebaad 100644
--- a/evaluation/mint/run_infer.py
+++ b/evaluation/mint/run_infer.py
@@ -212,7 +212,7 @@ def process_instance(
         metadata=metadata,
         history=histories,
         metrics=metrics,
-        error=state.last_error if state and state.last_error else None,
+        error=state.get_last_error() if state and state.get_last_error() else None,
         test_result={
             'success': task_state.success if task_state else False,
         },
diff --git a/evaluation/swe_bench/run_infer.py b/evaluation/swe_bench/run_infer.py
index 9ac1e0cf663..8ca66056e65 100644
--- a/evaluation/swe_bench/run_infer.py
+++ b/evaluation/swe_bench/run_infer.py
@@ -402,10 +402,10 @@ def process_instance(
 
         # if fatal error, throw EvalError to trigger re-run
         if (
-            state.last_error
-            and 'fatal error during agent execution' in state.last_error
+            state.get_last_error()
+            and 'fatal error during agent execution' in state.get_last_error()
         ):
-            raise EvalException('Fatal error detected: ' + state.last_error)
+            raise EvalException('Fatal error detected: ' + state.get_last_error())
 
         # ======= THIS IS SWE-Bench specific =======
         # Get git patch
@@ -442,7 +442,7 @@ def process_instance(
         metadata=metadata,
         history=histories,
         metrics=metrics,
-        error=state.last_error if state and state.last_error else None,
+        error=state.get_last_error() if state and state.get_last_error() else None,
     )
     return output
 
diff --git a/evaluation/swe_bench/scripts/eval/compare_outputs.py b/evaluation/swe_bench/scripts/eval/compare_outputs.py
old mode 100755
new mode 100644
diff --git a/evaluation/swe_bench/scripts/eval/convert_oh_output_to_md.py b/evaluation/swe_bench/scripts/eval/convert_oh_output_to_md.py
old mode 100755
new mode 100644
diff --git a/evaluation/swe_bench/scripts/eval/summarize_outputs.py b/evaluation/swe_bench/scripts/eval/summarize_outputs.py
old mode 100755
new mode 100644
diff --git a/evaluation/swe_bench/scripts/setup/compare_patch_filename.py b/evaluation/swe_bench/scripts/setup/compare_patch_filename.py
old mode 100755
new mode 100644
diff --git a/evaluation/toolqa/run_infer.py b/evaluation/toolqa/run_infer.py
index 5c2c5342278..c10d19fdf13 100644
--- a/evaluation/toolqa/run_infer.py
+++ b/evaluation/toolqa/run_infer.py
@@ -149,7 +149,7 @@ def process_instance(instance: Any, metadata: EvalMetadata, reset_logger: bool =
         metadata=metadata,
         history=histories,
         metrics=metrics,
-        error=state.last_error if state and state.last_error else None,
+        error=state.get_last_error() if state and state.get_last_error() else None,
     )
     return output
 
diff --git a/evaluation/webarena/run_infer.py b/evaluation/webarena/run_infer.py
index cfc2bdae493..d818f77307a 100644
--- a/evaluation/webarena/run_infer.py
+++ b/evaluation/webarena/run_infer.py
@@ -187,7 +187,7 @@ def process_instance(
         metadata=metadata,
         history=histories,
         metrics=metrics,
-        error=state.last_error if state and state.last_error else None,
+        error=state.get_last_error() if state and state.get_last_error() else None,
         test_result={
             'reward': reward,
         },
diff --git a/openhands/controller/agent_controller.py b/openhands/controller/agent_controller.py
index 946f6a7d327..0e921508f9c 100644
--- a/openhands/controller/agent_controller.py
+++ b/openhands/controller/agent_controller.py
@@ -133,21 +133,14 @@ async def update_state_after_step(self):
         # update metrics especially for cost. Use deepcopy to avoid it being modified by agent.reset()
         self.state.local_metrics = copy.deepcopy(self.agent.llm.metrics)
 
-    async def report_error(self, message: str, exception: Exception | None = None):
-        """Reports an error to the user and sends the exception to the LLM next step, in the hope it can self-correct.
-
-        This method should be called for a particular type of errors, which have:
-        - a user-friendly message, which will be shown in the chat box. This should not be a raw exception message.
-        - an ErrorObservation that can be sent to the LLM by the user role, with the exception message, so it can self-correct next time.
-        """
-        self.state.last_error = message
-        if exception:
-            self.state.last_error += f': {exception}'
+    async def send_error_to_event_stream(
+        self, message: str, exception: Exception | None = None
+    ):
         detail = str(exception) if exception is not None else ''
         if exception is not None and isinstance(exception, litellm.AuthenticationError):
-            detail = 'Please check your credentials. Is your API key correct?'
+            detail += '\nPlease check your credentials. Is your API key correct?'
         self.event_stream.add_event(
-            ErrorObservation(f'{message}:{detail}'), EventSource.USER
+            ErrorObservation(f'{message}:{detail}'), EventSource.AGENT
         )
 
     async def start_step_loop(self):
@@ -164,7 +157,7 @@ async def start_step_loop(self):
                 traceback.print_exc()
                 logger.error(f'Error while running the agent: {e}')
                 logger.error(traceback.format_exc())
-                await self.report_error(
+                await self.send_error_to_event_stream(
                     'There was an unexpected error while running the agent', exception=e
                 )
                 await self.set_agent_state_to(AgentState.ERROR)
@@ -254,9 +247,6 @@ async def _handle_observation(self, observation: Observation):
             if self.state.agent_state == AgentState.ERROR:
                 self.state.metrics.merge(self.state.local_metrics)
         elif isinstance(observation, FatalErrorObservation):
-            await self.report_error(
-                'There was a fatal error during agent execution: ' + str(observation)
-            )
             await self.set_agent_state_to(AgentState.ERROR)
             self.state.metrics.merge(self.state.local_metrics)
 
@@ -443,7 +433,7 @@ async def _step(self) -> None:
         except (LLMMalformedActionError, LLMNoActionError, LLMResponseError) as e:
             # report to the user
             # and send the underlying exception to the LLM for self-correction
-            await self.report_error(str(e))
+            await self.send_error_to_event_stream(str(e))
             return
 
         if action.runnable:
@@ -468,9 +458,9 @@ async def _step(self) -> None:
         logger.info(action, extra={'msg_type': 'ACTION'})
 
         if self._is_stuck():
-            # This need to go BEFORE report_error to sync metrics
+            # This need to go BEFORE send_error_to_event_stream to sync metrics
             await self.set_agent_state_to(AgentState.ERROR)
-            await self.report_error('Agent got stuck in a loop')
+            await self.send_error_to_event_stream('Agent got stuck in a loop')
 
     async def _delegate_step(self):
         """Executes a single step of the delegate agent."""
@@ -489,7 +479,9 @@ async def _delegate_step(self):
             self.delegate = None
             self.delegateAction = None
 
-            await self.report_error('Delegator agent encountered an error')
+            await self.send_error_to_event_stream(
+                'Delegator agent encountered an error'
+            )
         elif delegate_state in (AgentState.FINISHED, AgentState.REJECTED):
             logger.info(
                 f'[Agent Controller {self.id}] Delegate agent has finished execution'
@@ -538,17 +530,17 @@ async def _handle_traffic_control(
         else:
             self.state.traffic_control_state = TrafficControlState.THROTTLING
             if self.headless_mode:
-                # This need to go BEFORE report_error to sync metrics
+                # This need to go BEFORE send_error_to_event_stream to sync metrics
                 await self.set_agent_state_to(AgentState.ERROR)
                 # set to ERROR state if running in headless mode
                 # since user cannot resume on the web interface
-                await self.report_error(
+                await self.send_error_to_event_stream(
                     f'Agent reached maximum {limit_type} in headless mode, task stopped. '
                     f'Current {limit_type}: {current_value:.2f}, max {limit_type}: {max_value:.2f}'
                 )
             else:
                 await self.set_agent_state_to(AgentState.PAUSED)
-                await self.report_error(
+                await self.send_error_to_event_stream(
                     f'Agent reached maximum {limit_type}, task paused. '
                     f'Current {limit_type}: {current_value:.2f}, max {limit_type}: {max_value:.2f}. '
                     f'{TRAFFIC_CONTROL_REMINDER}'
diff --git a/openhands/controller/state/state.py b/openhands/controller/state/state.py
index e14d44517a5..d204c9609ec 100644
--- a/openhands/controller/state/state.py
+++ b/openhands/controller/state/state.py
@@ -11,6 +11,7 @@
     MessageAction,
 )
 from openhands.events.action.agent import AgentFinishAction
+from openhands.events.observation import ErrorObservation, FatalErrorObservation
 from openhands.llm.metrics import Metrics
 from openhands.memory.history import ShortTermHistory
 from openhands.storage.files import FileStore
@@ -80,7 +81,6 @@ class State:
     history: ShortTermHistory = field(default_factory=ShortTermHistory)
     inputs: dict = field(default_factory=dict)
     outputs: dict = field(default_factory=dict)
-    last_error: str | None = None
     agent_state: AgentState = AgentState.LOADING
     resume_state: AgentState | None = None
     traffic_control_state: TrafficControlState = TrafficControlState.NORMAL
@@ -124,9 +124,6 @@ def restore_from_session(sid: str, file_store: FileStore) -> 'State':
         else:
             state.resume_state = None
 
-        # don't carry last_error anymore after restore
-        state.last_error = None
-
         # first state after restore
         state.agent_state = AgentState.LOADING
         return state
@@ -157,6 +154,14 @@ def __setstate__(self, state):
 
         # remove the restored data from the state if any
 
+    def get_last_error(self) -> str:
+        for event in self.history.get_events(reverse=True):
+            if isinstance(event, ErrorObservation) or isinstance(
+                event, FatalErrorObservation
+            ):
+                return event.content
+        return ''
+
     def get_current_user_intent(self):
         """Returns the latest user message and image(if provided) that appears after a FinishAction, or the first (the task) if nothing was finished yet."""
         last_user_message = None

From ed3d94499ab15ab98e0863d19acdcbe140d83e19 Mon Sep 17 00:00:00 2001
From: Robert Brennan <contact@rbren.io>
Date: Fri, 25 Oct 2024 23:31:40 -0400
Subject: [PATCH 04/60] revert perms

---
 evaluation/swe_bench/scripts/eval/compare_outputs.py         | 0
 evaluation/swe_bench/scripts/eval/convert_oh_output_to_md.py | 0
 evaluation/swe_bench/scripts/eval/summarize_outputs.py       | 0
 evaluation/swe_bench/scripts/setup/compare_patch_filename.py | 0
 4 files changed, 0 insertions(+), 0 deletions(-)
 mode change 100644 => 100755 evaluation/swe_bench/scripts/eval/compare_outputs.py
 mode change 100644 => 100755 evaluation/swe_bench/scripts/eval/convert_oh_output_to_md.py
 mode change 100644 => 100755 evaluation/swe_bench/scripts/eval/summarize_outputs.py
 mode change 100644 => 100755 evaluation/swe_bench/scripts/setup/compare_patch_filename.py

diff --git a/evaluation/swe_bench/scripts/eval/compare_outputs.py b/evaluation/swe_bench/scripts/eval/compare_outputs.py
old mode 100644
new mode 100755
diff --git a/evaluation/swe_bench/scripts/eval/convert_oh_output_to_md.py b/evaluation/swe_bench/scripts/eval/convert_oh_output_to_md.py
old mode 100644
new mode 100755
diff --git a/evaluation/swe_bench/scripts/eval/summarize_outputs.py b/evaluation/swe_bench/scripts/eval/summarize_outputs.py
old mode 100644
new mode 100755
diff --git a/evaluation/swe_bench/scripts/setup/compare_patch_filename.py b/evaluation/swe_bench/scripts/setup/compare_patch_filename.py
old mode 100644
new mode 100755

From 57662b625819b47f7f1d5e4baf0c22261169279c Mon Sep 17 00:00:00 2001
From: Robert Brennan <contact@rbren.io>
Date: Fri, 25 Oct 2024 23:34:29 -0400
Subject: [PATCH 05/60] private method

---
 openhands/controller/agent_controller.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/openhands/controller/agent_controller.py b/openhands/controller/agent_controller.py
index 0e921508f9c..11db85c33d7 100644
--- a/openhands/controller/agent_controller.py
+++ b/openhands/controller/agent_controller.py
@@ -133,7 +133,7 @@ async def update_state_after_step(self):
         # update metrics especially for cost. Use deepcopy to avoid it being modified by agent.reset()
         self.state.local_metrics = copy.deepcopy(self.agent.llm.metrics)
 
-    async def send_error_to_event_stream(
+    async def _send_error_to_event_stream(
         self, message: str, exception: Exception | None = None
     ):
         detail = str(exception) if exception is not None else ''
@@ -157,7 +157,7 @@ async def start_step_loop(self):
                 traceback.print_exc()
                 logger.error(f'Error while running the agent: {e}')
                 logger.error(traceback.format_exc())
-                await self.send_error_to_event_stream(
+                await self._send_error_to_event_stream(
                     'There was an unexpected error while running the agent', exception=e
                 )
                 await self.set_agent_state_to(AgentState.ERROR)
@@ -433,7 +433,7 @@ async def _step(self) -> None:
         except (LLMMalformedActionError, LLMNoActionError, LLMResponseError) as e:
             # report to the user
             # and send the underlying exception to the LLM for self-correction
-            await self.send_error_to_event_stream(str(e))
+            await self._send_error_to_event_stream(str(e))
             return
 
         if action.runnable:
@@ -458,9 +458,9 @@ async def _step(self) -> None:
         logger.info(action, extra={'msg_type': 'ACTION'})
 
         if self._is_stuck():
-            # This need to go BEFORE send_error_to_event_stream to sync metrics
+            # This need to go BEFORE _send_error_to_event_stream to sync metrics
             await self.set_agent_state_to(AgentState.ERROR)
-            await self.send_error_to_event_stream('Agent got stuck in a loop')
+            await self._send_error_to_event_stream('Agent got stuck in a loop')
 
     async def _delegate_step(self):
         """Executes a single step of the delegate agent."""
@@ -479,7 +479,7 @@ async def _delegate_step(self):
             self.delegate = None
             self.delegateAction = None
 
-            await self.send_error_to_event_stream(
+            await self._send_error_to_event_stream(
                 'Delegator agent encountered an error'
             )
         elif delegate_state in (AgentState.FINISHED, AgentState.REJECTED):
@@ -530,17 +530,17 @@ async def _handle_traffic_control(
         else:
             self.state.traffic_control_state = TrafficControlState.THROTTLING
             if self.headless_mode:
-                # This need to go BEFORE send_error_to_event_stream to sync metrics
+                # This need to go BEFORE _send_error_to_event_stream to sync metrics
                 await self.set_agent_state_to(AgentState.ERROR)
                 # set to ERROR state if running in headless mode
                 # since user cannot resume on the web interface
-                await self.send_error_to_event_stream(
+                await self._send_error_to_event_stream(
                     f'Agent reached maximum {limit_type} in headless mode, task stopped. '
                     f'Current {limit_type}: {current_value:.2f}, max {limit_type}: {max_value:.2f}'
                 )
             else:
                 await self.set_agent_state_to(AgentState.PAUSED)
-                await self.send_error_to_event_stream(
+                await self._send_error_to_event_stream(
                     f'Agent reached maximum {limit_type}, task paused. '
                     f'Current {limit_type}: {current_value:.2f}, max {limit_type}: {max_value:.2f}. '
                     f'{TRAFFIC_CONTROL_REMINDER}'

From a7ce34e2692e1cce0ebb61f4b4b77d148c481615 Mon Sep 17 00:00:00 2001
From: Robert Brennan <contact@rbren.io>
Date: Fri, 25 Oct 2024 23:45:25 -0400
Subject: [PATCH 06/60] async_add_event

---
 openhands/controller/agent_controller.py | 53 +++++++++++++-----------
 1 file changed, 28 insertions(+), 25 deletions(-)

diff --git a/openhands/controller/agent_controller.py b/openhands/controller/agent_controller.py
index 11db85c33d7..9550286c316 100644
--- a/openhands/controller/agent_controller.py
+++ b/openhands/controller/agent_controller.py
@@ -133,13 +133,18 @@ async def update_state_after_step(self):
         # update metrics especially for cost. Use deepcopy to avoid it being modified by agent.reset()
         self.state.local_metrics = copy.deepcopy(self.agent.llm.metrics)
 
-    async def _send_error_to_event_stream(
-        self, message: str, exception: Exception | None = None
+    async def _react_to_error(
+        self,
+        message: str,
+        exception: Exception | None = None,
+        new_state: AgentState | None = None,
     ):
+        if new_state is not None:
+            await self.set_agent_state_to(new_state)
         detail = str(exception) if exception is not None else ''
         if exception is not None and isinstance(exception, litellm.AuthenticationError):
             detail += '\nPlease check your credentials. Is your API key correct?'
-        self.event_stream.add_event(
+        await self.event_stream.async_add_event(
             ErrorObservation(f'{message}:{detail}'), EventSource.AGENT
         )
 
@@ -157,10 +162,11 @@ async def start_step_loop(self):
                 traceback.print_exc()
                 logger.error(f'Error while running the agent: {e}')
                 logger.error(traceback.format_exc())
-                await self._send_error_to_event_stream(
-                    'There was an unexpected error while running the agent', exception=e
+                await self._react_to_error(
+                    'There was an unexpected error while running the agent',
+                    exception=e,
+                    new_state=AgentState.ERROR,
                 )
-                await self.set_agent_state_to(AgentState.ERROR)
                 break
 
             await asyncio.sleep(0.1)
@@ -320,7 +326,9 @@ async def set_agent_state_to(self, new_state: AgentState):
             else:
                 confirmation_state = ActionConfirmationStatus.REJECTED
             self._pending_action.confirmation_state = confirmation_state  # type: ignore[attr-defined]
-            self.event_stream.add_event(self._pending_action, EventSource.AGENT)
+            await self.event_stream.async_add_event(
+                self._pending_action, EventSource.AGENT
+            )
 
         self.state.agent_state = new_state
         self.event_stream.add_event(
@@ -433,7 +441,7 @@ async def _step(self) -> None:
         except (LLMMalformedActionError, LLMNoActionError, LLMResponseError) as e:
             # report to the user
             # and send the underlying exception to the LLM for self-correction
-            await self._send_error_to_event_stream(str(e))
+            await self._react_to_error(str(e))
             return
 
         if action.runnable:
@@ -452,15 +460,15 @@ async def _step(self) -> None:
                 == ActionConfirmationStatus.AWAITING_CONFIRMATION
             ):
                 await self.set_agent_state_to(AgentState.AWAITING_USER_CONFIRMATION)
-            self.event_stream.add_event(action, EventSource.AGENT)
+            await self.event_stream.async_add_event(action, EventSource.AGENT)
 
         await self.update_state_after_step()
         logger.info(action, extra={'msg_type': 'ACTION'})
 
         if self._is_stuck():
-            # This need to go BEFORE _send_error_to_event_stream to sync metrics
-            await self.set_agent_state_to(AgentState.ERROR)
-            await self._send_error_to_event_stream('Agent got stuck in a loop')
+            await self._react_to_error(
+                'Agent got stuck in a loop', new_state=AgentState.ERROR
+            )
 
     async def _delegate_step(self):
         """Executes a single step of the delegate agent."""
@@ -479,9 +487,7 @@ async def _delegate_step(self):
             self.delegate = None
             self.delegateAction = None
 
-            await self._send_error_to_event_stream(
-                'Delegator agent encountered an error'
-            )
+            await self._react_to_error('Delegator agent encountered an error')
         elif delegate_state in (AgentState.FINISHED, AgentState.REJECTED):
             logger.info(
                 f'[Agent Controller {self.id}] Delegate agent has finished execution'
@@ -510,7 +516,7 @@ async def _delegate_step(self):
             # clean up delegate status
             self.delegate = None
             self.delegateAction = None
-            self.event_stream.add_event(obs, EventSource.AGENT)
+            await self.event_stream.async_add_event(obs, EventSource.AGENT)
         return
 
     async def _handle_traffic_control(
@@ -530,20 +536,17 @@ async def _handle_traffic_control(
         else:
             self.state.traffic_control_state = TrafficControlState.THROTTLING
             if self.headless_mode:
-                # This need to go BEFORE _send_error_to_event_stream to sync metrics
-                await self.set_agent_state_to(AgentState.ERROR)
-                # set to ERROR state if running in headless mode
-                # since user cannot resume on the web interface
-                await self._send_error_to_event_stream(
+                await self._react_to_error(
                     f'Agent reached maximum {limit_type} in headless mode, task stopped. '
-                    f'Current {limit_type}: {current_value:.2f}, max {limit_type}: {max_value:.2f}'
+                    f'Current {limit_type}: {current_value:.2f}, max {limit_type}: {max_value:.2f}',
+                    new_state=AgentState.ERROR,
                 )
             else:
-                await self.set_agent_state_to(AgentState.PAUSED)
-                await self._send_error_to_event_stream(
+                await self._react_to_error(
                     f'Agent reached maximum {limit_type}, task paused. '
                     f'Current {limit_type}: {current_value:.2f}, max {limit_type}: {max_value:.2f}. '
-                    f'{TRAFFIC_CONTROL_REMINDER}'
+                    f'{TRAFFIC_CONTROL_REMINDER}',
+                    new_state=AgentState.PAUSED,
                 )
             stop_step = True
         return stop_step

From d8becc23e39ea2a718858fae6bd312b12e5a873d Mon Sep 17 00:00:00 2001
From: Robert Brennan <contact@rbren.io>
Date: Fri, 25 Oct 2024 23:52:00 -0400
Subject: [PATCH 07/60] remove unnecessary return

---
 openhands/controller/agent_controller.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/openhands/controller/agent_controller.py b/openhands/controller/agent_controller.py
index 9550286c316..22e466a8800 100644
--- a/openhands/controller/agent_controller.py
+++ b/openhands/controller/agent_controller.py
@@ -35,7 +35,6 @@
 from openhands.events.observation import (
     AgentDelegateObservation,
     AgentStateChangedObservation,
-    CmdOutputObservation,
     ErrorObservation,
     FatalErrorObservation,
     Observation,
@@ -245,9 +244,7 @@ async def _handle_observation(self, observation: Observation):
                 await self.set_agent_state_to(AgentState.AWAITING_USER_INPUT)
             return
 
-        if isinstance(observation, CmdOutputObservation):
-            return
-        elif isinstance(observation, AgentDelegateObservation):
+        if isinstance(observation, AgentDelegateObservation):
             self.state.history.on_event(observation)
         elif isinstance(observation, ErrorObservation):
             if self.state.agent_state == AgentState.ERROR:

From d9811b00a6f941ccf2b9c29007ba1da56b1d541a Mon Sep 17 00:00:00 2001
From: Robert Brennan <contact@rbren.io>
Date: Fri, 25 Oct 2024 23:53:02 -0400
Subject: [PATCH 08/60] stop dropping events while pending_action is set

---
 openhands/controller/agent_controller.py | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/openhands/controller/agent_controller.py b/openhands/controller/agent_controller.py
index 22e466a8800..ab1e7b84caf 100644
--- a/openhands/controller/agent_controller.py
+++ b/openhands/controller/agent_controller.py
@@ -216,15 +216,6 @@ async def _handle_observation(self, observation: Observation):
         Args:
             observation (observation): The observation to handle.
         """
-        if (
-            self._pending_action
-            and hasattr(self._pending_action, 'confirmation_state')
-            and self._pending_action.confirmation_state
-            == ActionConfirmationStatus.AWAITING_CONFIRMATION
-        ):
-            return
-
-        # Make sure we print the observation in the same way as the LLM sees it
         observation_to_print = copy.deepcopy(observation)
         if len(observation_to_print.content) > self.agent.llm.config.max_message_chars:
             observation_to_print.content = truncate_content(
@@ -232,7 +223,6 @@ async def _handle_observation(self, observation: Observation):
             )
         logger.info(observation_to_print, extra={'msg_type': 'OBSERVATION'})
 
-        # Merge with the metrics from the LLM - it will to synced to the controller's local metrics in update_state_after_step()
         if observation.llm_metrics is not None:
             self.agent.llm.metrics.merge(observation.llm_metrics)
 

From 47a95f70fa7317a76de7810c5df5b90aac66acac Mon Sep 17 00:00:00 2001
From: Robert Brennan <contact@rbren.io>
Date: Fri, 25 Oct 2024 23:58:47 -0400
Subject: [PATCH 09/60] remove FatalErrorObservation

---
 openhands/controller/agent_controller.py      |  6 ++---
 openhands/controller/state/state.py           |  6 ++---
 openhands/events/observation/__init__.py      |  3 +--
 openhands/events/observation/error.py         | 15 +----------
 openhands/runtime/action_execution_server.py  |  3 +--
 .../impl/eventstream/eventstream_runtime.py   | 22 +++++++++-------
 .../runtime/impl/remote/remote_runtime.py     | 26 +++++++++++--------
 openhands/runtime/utils/bash.py               |  9 ++++---
 openhands/runtime/utils/edit.py               | 11 ++++----
 9 files changed, 46 insertions(+), 55 deletions(-)

diff --git a/openhands/controller/agent_controller.py b/openhands/controller/agent_controller.py
index ab1e7b84caf..59f6417c40c 100644
--- a/openhands/controller/agent_controller.py
+++ b/openhands/controller/agent_controller.py
@@ -36,7 +36,6 @@
     AgentDelegateObservation,
     AgentStateChangedObservation,
     ErrorObservation,
-    FatalErrorObservation,
     Observation,
 )
 from openhands.events.serialization.event import truncate_content
@@ -237,11 +236,10 @@ async def _handle_observation(self, observation: Observation):
         if isinstance(observation, AgentDelegateObservation):
             self.state.history.on_event(observation)
         elif isinstance(observation, ErrorObservation):
+            if observation.fatal:
+                await self.set_agent_state_to(AgentState.ERROR)
             if self.state.agent_state == AgentState.ERROR:
                 self.state.metrics.merge(self.state.local_metrics)
-        elif isinstance(observation, FatalErrorObservation):
-            await self.set_agent_state_to(AgentState.ERROR)
-            self.state.metrics.merge(self.state.local_metrics)
 
     async def _handle_message_action(self, action: MessageAction):
         """Handles message actions from the event stream.
diff --git a/openhands/controller/state/state.py b/openhands/controller/state/state.py
index d204c9609ec..2ef4ac20350 100644
--- a/openhands/controller/state/state.py
+++ b/openhands/controller/state/state.py
@@ -11,7 +11,7 @@
     MessageAction,
 )
 from openhands.events.action.agent import AgentFinishAction
-from openhands.events.observation import ErrorObservation, FatalErrorObservation
+from openhands.events.observation import ErrorObservation
 from openhands.llm.metrics import Metrics
 from openhands.memory.history import ShortTermHistory
 from openhands.storage.files import FileStore
@@ -156,9 +156,7 @@ def __setstate__(self, state):
 
     def get_last_error(self) -> str:
         for event in self.history.get_events(reverse=True):
-            if isinstance(event, ErrorObservation) or isinstance(
-                event, FatalErrorObservation
-            ):
+            if isinstance(event, ErrorObservation):
                 return event.content
         return ''
 
diff --git a/openhands/events/observation/__init__.py b/openhands/events/observation/__init__.py
index a0fad86dfb4..28525b09aab 100644
--- a/openhands/events/observation/__init__.py
+++ b/openhands/events/observation/__init__.py
@@ -6,7 +6,7 @@
 )
 from openhands.events.observation.delegate import AgentDelegateObservation
 from openhands.events.observation.empty import NullObservation
-from openhands.events.observation.error import ErrorObservation, FatalErrorObservation
+from openhands.events.observation.error import ErrorObservation
 from openhands.events.observation.files import (
     FileEditObservation,
     FileReadObservation,
@@ -26,7 +26,6 @@
     'FileWriteObservation',
     'FileEditObservation',
     'ErrorObservation',
-    'FatalErrorObservation',
     'AgentStateChangedObservation',
     'AgentDelegateObservation',
     'SuccessObservation',
diff --git a/openhands/events/observation/error.py b/openhands/events/observation/error.py
index cfbb291eb0f..d10fa2b8b75 100644
--- a/openhands/events/observation/error.py
+++ b/openhands/events/observation/error.py
@@ -12,6 +12,7 @@ class ErrorObservation(Observation):
     E.g., Linter error after editing a file.
     """
 
+    fatal: bool = False
     observation: str = ObservationType.ERROR
 
     @property
@@ -20,17 +21,3 @@ def message(self) -> str:
 
     def __str__(self) -> str:
         return f'**ErrorObservation**\n{self.content}'
-
-
-@dataclass
-class FatalErrorObservation(Observation):
-    """This data class represents a fatal error encountered by the agent.
-
-    This is the type of error that LLM CANNOT recover from, and the agent controller should stop the execution and report the error to the user.
-    E.g., Remote runtime action execution failure: 503 Server Error: Service Unavailable for url OR 404 Not Found.
-    """
-
-    observation: str = ObservationType.ERROR
-
-    def __str__(self) -> str:
-        return f'**FatalErrorObservation**\n{self.content}'
diff --git a/openhands/runtime/action_execution_server.py b/openhands/runtime/action_execution_server.py
index 2da1372532d..b9f37b6dada 100644
--- a/openhands/runtime/action_execution_server.py
+++ b/openhands/runtime/action_execution_server.py
@@ -37,7 +37,6 @@
 from openhands.events.observation import (
     CmdOutputObservation,
     ErrorObservation,
-    FatalErrorObservation,
     FileReadObservation,
     FileWriteObservation,
     IPythonRunCellObservation,
@@ -167,7 +166,7 @@ async def run_action(self, action) -> Observation:
 
     async def run(
         self, action: CmdRunAction
-    ) -> CmdOutputObservation | FatalErrorObservation:
+    ) -> CmdOutputObservation | ErrorObservation:
         return self.bash_session.run(action)
 
     async def run_ipython(self, action: IPythonRunCellAction) -> Observation:
diff --git a/openhands/runtime/impl/eventstream/eventstream_runtime.py b/openhands/runtime/impl/eventstream/eventstream_runtime.py
index 1cf40969225..60f9231a723 100644
--- a/openhands/runtime/impl/eventstream/eventstream_runtime.py
+++ b/openhands/runtime/impl/eventstream/eventstream_runtime.py
@@ -25,7 +25,7 @@
 )
 from openhands.events.action.action import Action
 from openhands.events.observation import (
-    FatalErrorObservation,
+    ErrorObservation,
     NullObservation,
     Observation,
     UserRejectObservation,
@@ -452,10 +452,13 @@ def run_action(self, action: Action) -> Observation:
                 return NullObservation('')
             action_type = action.action  # type: ignore[attr-defined]
             if action_type not in ACTION_TYPE_TO_CLASS:
-                return FatalErrorObservation(f'Action {action_type} does not exist.')
+                return ErrorObservation(
+                    f'Action {action_type} does not exist.', fatal=True
+                )
             if not hasattr(self, action_type):
-                return FatalErrorObservation(
-                    f'Action {action_type} is not supported in the current runtime.'
+                return ErrorObservation(
+                    f'Action {action_type} is not supported in the current runtime.',
+                    fatal=True,
                 )
             if (
                 getattr(action, 'confirmation_state', None)
@@ -486,17 +489,18 @@ def run_action(self, action: Action) -> Observation:
                     logger.debug(f'response: {response}')
                     error_message = response.text
                     logger.error(f'Error from server: {error_message}')
-                    obs = FatalErrorObservation(
-                        f'Action execution failed: {error_message}'
+                    obs = ErrorObservation(
+                        f'Action execution failed: {error_message}', fatal=True
                     )
             except requests.Timeout:
                 logger.error('No response received within the timeout period.')
-                obs = FatalErrorObservation(
-                    f'Action execution timed out after {action.timeout} seconds.'
+                obs = ErrorObservation(
+                    f'Action execution timed out after {action.timeout} seconds.',
+                    fatal=True,
                 )
             except Exception as e:
                 logger.error(f'Error during action execution: {e}')
-                obs = FatalErrorObservation(f'Action execution failed: {str(e)}')
+                obs = ErrorObservation(f'Action execution failed: {str(e)}', fatal=True)
             self._refresh_logs()
             return obs
 
diff --git a/openhands/runtime/impl/remote/remote_runtime.py b/openhands/runtime/impl/remote/remote_runtime.py
index 8c25401af9c..f921d39e432 100644
--- a/openhands/runtime/impl/remote/remote_runtime.py
+++ b/openhands/runtime/impl/remote/remote_runtime.py
@@ -22,7 +22,7 @@
 )
 from openhands.events.action.action import Action
 from openhands.events.observation import (
-    FatalErrorObservation,
+    ErrorObservation,
     NullObservation,
     Observation,
 )
@@ -351,12 +351,14 @@ def run_action(self, action: Action) -> Observation:
                 return NullObservation('')
             action_type = action.action  # type: ignore[attr-defined]
             if action_type not in ACTION_TYPE_TO_CLASS:
-                return FatalErrorObservation(
-                    f'[Runtime (ID={self.runtime_id})] Action {action_type} does not exist.'
+                return ErrorObservation(
+                    f'[Runtime (ID={self.runtime_id})] Action {action_type} does not exist.',
+                    fatal=True,
                 )
             if not hasattr(self, action_type):
-                return FatalErrorObservation(
-                    f'[Runtime (ID={self.runtime_id})] Action {action_type} is not supported in the current runtime.'
+                return ErrorObservation(
+                    f'[Runtime (ID={self.runtime_id})] Action {action_type} is not supported in the current runtime.',
+                    fatal=True,
                 )
 
             assert action.timeout is not None
@@ -379,18 +381,20 @@ def run_action(self, action: Action) -> Observation:
                 else:
                     error_message = response.text
                     logger.error(f'Error from server: {error_message}')
-                    obs = FatalErrorObservation(
-                        f'Action execution failed: {error_message}'
+                    obs = ErrorObservation(
+                        f'Action execution failed: {error_message}', fatal=True
                     )
             except Timeout:
                 logger.error('No response received within the timeout period.')
-                obs = FatalErrorObservation(
-                    f'[Runtime (ID={self.runtime_id})] Action execution timed out'
+                obs = ErrorObservation(
+                    f'[Runtime (ID={self.runtime_id})] Action execution timed out',
+                    fatal=True,
                 )
             except Exception as e:
                 logger.error(f'Error during action execution: {e}')
-                obs = FatalErrorObservation(
-                    f'[Runtime (ID={self.runtime_id})] Action execution failed: {str(e)}'
+                obs = ErrorObservation(
+                    f'[Runtime (ID={self.runtime_id})] Action execution failed: {str(e)}',
+                    fatal=True,
                 )
             return obs
 
diff --git a/openhands/runtime/utils/bash.py b/openhands/runtime/utils/bash.py
index fba16787c6d..98e3ad9c09b 100644
--- a/openhands/runtime/utils/bash.py
+++ b/openhands/runtime/utils/bash.py
@@ -9,7 +9,7 @@
 from openhands.events.event import EventSource
 from openhands.events.observation import (
     CmdOutputObservation,
-    FatalErrorObservation,
+    ErrorObservation,
 )
 
 SOFT_TIMEOUT_SECONDS = 5
@@ -275,7 +275,7 @@ def _continue_bash(
                 output += '\r\n' + bash_prompt
         return output, exit_code
 
-    def run(self, action: CmdRunAction) -> CmdOutputObservation | FatalErrorObservation:
+    def run(self, action: CmdRunAction) -> CmdOutputObservation | ErrorObservation:
         try:
             assert (
                 action.timeout is not None
@@ -329,6 +329,7 @@ def run(self, action: CmdRunAction) -> CmdOutputObservation | FatalErrorObservat
                 interpreter_details=python_interpreter,
             )
         except UnicodeDecodeError as e:
-            return FatalErrorObservation(
-                f'Runtime bash execution failed: Command output could not be decoded as utf-8. {str(e)}'
+            return ErrorObservation(
+                f'Runtime bash execution failed: Command output could not be decoded as utf-8. {str(e)}',
+                fatal=True,
             )
diff --git a/openhands/runtime/utils/edit.py b/openhands/runtime/utils/edit.py
index 4ed5c0edafc..39b5cbbce65 100644
--- a/openhands/runtime/utils/edit.py
+++ b/openhands/runtime/utils/edit.py
@@ -13,7 +13,6 @@
 )
 from openhands.events.observation import (
     ErrorObservation,
-    FatalErrorObservation,
     FileEditObservation,
     FileReadObservation,
     FileWriteObservation,
@@ -214,8 +213,9 @@ def edit(self, action: FileEditAction) -> Observation:
             if isinstance(obs, ErrorObservation):
                 return obs
             if not isinstance(obs, FileWriteObservation):
-                return FatalErrorObservation(
-                    f'Fatal Runtime in editing: Expected FileWriteObservation, got {type(obs)}: {str(obs)}'
+                return ErrorObservation(
+                    f'Fatal Runtime in editing: Expected FileWriteObservation, got {type(obs)}: {str(obs)}',
+                    fatal=True,
                 )
             return FileEditObservation(
                 content=get_diff('', action.content, action.path),
@@ -225,8 +225,9 @@ def edit(self, action: FileEditAction) -> Observation:
                 new_content=action.content,
             )
         if not isinstance(obs, FileReadObservation):
-            return FatalErrorObservation(
-                f'Fatal Runtime in editing: Expected FileReadObservation, got {type(obs)}: {str(obs)}'
+            return ErrorObservation(
+                f'Fatal Runtime in editing: Expected FileReadObservation, got {type(obs)}: {str(obs)}',
+                fatal=True,
             )
 
         original_file_content = obs.content

From d46b71272d3650792ec3e975e81735b8e10977cc Mon Sep 17 00:00:00 2001
From: Robert Brennan <contact@rbren.io>
Date: Sat, 26 Oct 2024 00:04:49 -0400
Subject: [PATCH 10/60] refactor react_to_error a bit more

---
 openhands/controller/agent_controller.py | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/openhands/controller/agent_controller.py b/openhands/controller/agent_controller.py
index 59f6417c40c..087b029aa8f 100644
--- a/openhands/controller/agent_controller.py
+++ b/openhands/controller/agent_controller.py
@@ -134,16 +134,13 @@ async def update_state_after_step(self):
     async def _react_to_error(
         self,
         message: str,
-        exception: Exception | None = None,
         new_state: AgentState | None = None,
     ):
         if new_state is not None:
+            # it's important to set the state before adding the error event, so that metrics sync properly
             await self.set_agent_state_to(new_state)
-        detail = str(exception) if exception is not None else ''
-        if exception is not None and isinstance(exception, litellm.AuthenticationError):
-            detail += '\nPlease check your credentials. Is your API key correct?'
         await self.event_stream.async_add_event(
-            ErrorObservation(f'{message}:{detail}'), EventSource.AGENT
+            ErrorObservation(message), EventSource.AGENT
         )
 
     async def start_step_loop(self):
@@ -160,9 +157,13 @@ async def start_step_loop(self):
                 traceback.print_exc()
                 logger.error(f'Error while running the agent: {e}')
                 logger.error(traceback.format_exc())
+                detail = str(e)
+                if isinstance(e, litellm.AuthenticationError):
+                    detail += (
+                        '\nPlease check your credentials. Is your API key correct?'
+                    )
                 await self._react_to_error(
-                    'There was an unexpected error while running the agent',
-                    exception=e,
+                    detail,
                     new_state=AgentState.ERROR,
                 )
                 break

From f9c283ea9bc084c3a50d2b688953886db288cdaf Mon Sep 17 00:00:00 2001
From: Robert Brennan <contact@rbren.io>
Date: Sat, 26 Oct 2024 00:09:03 -0400
Subject: [PATCH 11/60] fix comments

---
 openhands/controller/agent_controller.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/openhands/controller/agent_controller.py b/openhands/controller/agent_controller.py
index 087b029aa8f..a51620f1d2c 100644
--- a/openhands/controller/agent_controller.py
+++ b/openhands/controller/agent_controller.py
@@ -425,9 +425,9 @@ async def _step(self) -> None:
             if action is None:
                 raise LLMNoActionError('No action was returned')
         except (LLMMalformedActionError, LLMNoActionError, LLMResponseError) as e:
-            # report to the user
-            # and send the underlying exception to the LLM for self-correction
-            await self._react_to_error(str(e))
+            await self._react_to_error(
+                str(e), new_state=None
+            )  # don't change state, LLM can correct itself
             return
 
         if action.runnable:
@@ -473,7 +473,9 @@ async def _delegate_step(self):
             self.delegate = None
             self.delegateAction = None
 
-            await self._react_to_error('Delegator agent encountered an error')
+            await self._react_to_error(
+                'Delegator agent encountered an error', new_state=None
+            )
         elif delegate_state in (AgentState.FINISHED, AgentState.REJECTED):
             logger.info(
                 f'[Agent Controller {self.id}] Delegate agent has finished execution'

From 1c44fa6807ab21c98807f53838ddd82914c8dd8f Mon Sep 17 00:00:00 2001
From: Robert Brennan <contact@rbren.io>
Date: Sat, 26 Oct 2024 00:45:06 -0400
Subject: [PATCH 12/60] fix test

---
 tests/unit/test_is_stuck.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/unit/test_is_stuck.py b/tests/unit/test_is_stuck.py
index 4a133075216..d42fc7392e4 100644
--- a/tests/unit/test_is_stuck.py
+++ b/tests/unit/test_is_stuck.py
@@ -440,9 +440,9 @@ def test_is_stuck_repeating_action_observation_pattern(
         read_observation_2._cause = read_action_2._id
         event_stream.add_event(read_observation_2, EventSource.USER)
 
-        # one more message to break the pattern
-        message_null_observation = NullObservation(content='')
+        message_action = MessageAction(content='Come on', wait_for_response=False)
         event_stream.add_event(message_action, EventSource.USER)
+        message_null_observation = NullObservation(content='')
         event_stream.add_event(message_null_observation, EventSource.USER)
 
         cmd_action_3 = CmdRunAction(command='ls')

From 3f67ed38d0d059da0348011733af34a72b2b8b3a Mon Sep 17 00:00:00 2001
From: Robert Brennan <contact@rbren.io>
Date: Sat, 26 Oct 2024 01:01:30 -0400
Subject: [PATCH 13/60] fix test

---
 tests/unit/test_agent_controller.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tests/unit/test_agent_controller.py b/tests/unit/test_agent_controller.py
index fbaa225c907..1aee80666c6 100644
--- a/tests/unit/test_agent_controller.py
+++ b/tests/unit/test_agent_controller.py
@@ -87,7 +87,7 @@ async def test_on_event_change_agent_state_action(mock_agent, mock_event_stream)
 
 
 @pytest.mark.asyncio
-async def test_report_error(mock_agent, mock_event_stream):
+async def test__react_to_error(mock_agent, mock_event_stream):
     controller = AgentController(
         agent=mock_agent,
         event_stream=mock_event_stream,
@@ -97,7 +97,7 @@ async def test_report_error(mock_agent, mock_event_stream):
         headless_mode=True,
     )
     error_message = 'Test error'
-    await controller.report_error(error_message)
+    await controller._react_to_error(error_message)
     assert controller.state.last_error == error_message
     controller.event_stream.add_event.assert_called_once()
     await controller.close()
@@ -114,12 +114,12 @@ async def test_step_with_exception(mock_agent, mock_event_stream):
         headless_mode=True,
     )
     controller.state.agent_state = AgentState.RUNNING
-    controller.report_error = AsyncMock()
+    controller._react_to_error = AsyncMock()
     controller.agent.step.side_effect = LLMMalformedActionError('Malformed action')
     await controller._step()
 
-    # Verify that report_error was called with the correct error message
-    controller.report_error.assert_called_once_with('Malformed action')
+    # Verify that _react_to_error was called with the correct error message
+    controller._react_to_error.assert_called_once_with('Malformed action')
     await controller.close()
 
 

From 6aff3818c60920d5ffa479b115088f3cceed25ba Mon Sep 17 00:00:00 2001
From: Engel Nyst <enyst@users.noreply.github.com>
Date: Sun, 27 Oct 2024 04:59:55 +0100
Subject: [PATCH 14/60] Update openhands/controller/state/state.py

Co-authored-by: Xingyao Wang <xingyao@all-hands.dev>
---
 openhands/controller/state/state.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/openhands/controller/state/state.py b/openhands/controller/state/state.py
index 2ef4ac20350..0a84d821a6d 100644
--- a/openhands/controller/state/state.py
+++ b/openhands/controller/state/state.py
@@ -157,7 +157,7 @@ def __setstate__(self, state):
     def get_last_error(self) -> str:
         for event in self.history.get_events(reverse=True):
             if isinstance(event, ErrorObservation):
-                return event.content
+                return event.content if not event.fatal else 'There was a fatal error during agent execution: ' + event.content
         return ''
 
     def get_current_user_intent(self):

From b278d45f0829125c5d1e608c8bb6f15b4a694d1b Mon Sep 17 00:00:00 2001
From: Engel Nyst <enyst@users.noreply.github.com>
Date: Tue, 29 Oct 2024 11:22:18 +0100
Subject: [PATCH 15/60] more invisible conflicts

---
 tests/unit/test_agent_controller.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/tests/unit/test_agent_controller.py b/tests/unit/test_agent_controller.py
index 6d0fe07a748..e6de80d274b 100644
--- a/tests/unit/test_agent_controller.py
+++ b/tests/unit/test_agent_controller.py
@@ -14,7 +14,6 @@
 from openhands.events.action import ChangeAgentStateAction, CmdRunAction, MessageAction
 from openhands.events.observation import (
     ErrorObservation,
-    FatalErrorObservation,
 )
 from openhands.events.serialization import event_to_dict
 from openhands.llm import LLM
@@ -148,7 +147,7 @@ async def test_run_controller_with_fatal_error(mock_agent, mock_event_stream):
     agent.llm.metrics = Metrics()
     agent.llm.config = config.get_llm_config()
 
-    fatal_error_obs = FatalErrorObservation('Fatal error detected')
+    fatal_error_obs = ErrorObservation('Fatal error detected', fatal=True)
     fatal_error_obs._cause = event.id
 
     runtime = MagicMock(spec=Runtime)
@@ -175,8 +174,8 @@ async def on_event(event: Event):
     # in side run_controller (since the while loop + sleep no longer loop)
     assert state.agent_state == AgentState.STOPPED
     assert (
-        state.last_error
-        == 'There was a fatal error during agent execution: **FatalErrorObservation**\nFatal error detected'
+        state.get_last_error()
+        == 'There was a fatal error during agent execution: **ErrorObservation**\nFatal error detected'
     )
     assert len(list(event_stream.get_events())) == 5
 

From d19f998270702147ea795820a31383a082c8adbd Mon Sep 17 00:00:00 2001
From: Robert Brennan <contact@rbren.io>
Date: Tue, 29 Oct 2024 13:10:04 -0400
Subject: [PATCH 16/60] remove fatal observations

---
 openhands/controller/agent_controller.py                  | 2 --
 openhands/controller/state/state.py                       | 7 +------
 openhands/events/observation/error.py                     | 1 -
 openhands/runtime/impl/eventstream/eventstream_runtime.py | 8 +++-----
 openhands/runtime/impl/remote/remote_runtime.py           | 8 +-------
 openhands/runtime/utils/bash.py                           | 1 -
 openhands/runtime/utils/edit.py                           | 6 ++----
 7 files changed, 7 insertions(+), 26 deletions(-)

diff --git a/openhands/controller/agent_controller.py b/openhands/controller/agent_controller.py
index b6991c9ea92..078d145e78e 100644
--- a/openhands/controller/agent_controller.py
+++ b/openhands/controller/agent_controller.py
@@ -246,8 +246,6 @@ async def _handle_observation(self, observation: Observation):
         if isinstance(observation, AgentDelegateObservation):
             self.state.history.on_event(observation)
         elif isinstance(observation, ErrorObservation):
-            if observation.fatal:
-                await self.set_agent_state_to(AgentState.ERROR)
             if self.state.agent_state == AgentState.ERROR:
                 self.state.metrics.merge(self.state.local_metrics)
 
diff --git a/openhands/controller/state/state.py b/openhands/controller/state/state.py
index 50ba265c6e3..db3b3638a8c 100644
--- a/openhands/controller/state/state.py
+++ b/openhands/controller/state/state.py
@@ -157,12 +157,7 @@ def __setstate__(self, state):
     def get_last_error(self) -> str:
         for event in self.history.get_events(reverse=True):
             if isinstance(event, ErrorObservation):
-                return (
-                    event.content
-                    if not event.fatal
-                    else 'There was a fatal error during agent execution: '
-                    + event.content
-                )
+                return event.content
         return ''
 
     def get_current_user_intent(self):
diff --git a/openhands/events/observation/error.py b/openhands/events/observation/error.py
index d10fa2b8b75..c6139e86c0b 100644
--- a/openhands/events/observation/error.py
+++ b/openhands/events/observation/error.py
@@ -12,7 +12,6 @@ class ErrorObservation(Observation):
     E.g., Linter error after editing a file.
     """
 
-    fatal: bool = False
     observation: str = ObservationType.ERROR
 
     @property
diff --git a/openhands/runtime/impl/eventstream/eventstream_runtime.py b/openhands/runtime/impl/eventstream/eventstream_runtime.py
index be4c415ec05..aac4c9e5770 100644
--- a/openhands/runtime/impl/eventstream/eventstream_runtime.py
+++ b/openhands/runtime/impl/eventstream/eventstream_runtime.py
@@ -466,12 +466,11 @@ def run_action(self, action: Action) -> Observation:
             action_type = action.action  # type: ignore[attr-defined]
             if action_type not in ACTION_TYPE_TO_CLASS:
                 return ErrorObservation(
-                    f'Action {action_type} does not exist.', fatal=True
+                    f'Action {action_type} does not exist.',
                 )
             if not hasattr(self, action_type):
                 return ErrorObservation(
                     f'Action {action_type} is not supported in the current runtime.',
-                    fatal=True,
                 )
             if (
                 getattr(action, 'confirmation_state', None)
@@ -503,17 +502,16 @@ def run_action(self, action: Action) -> Observation:
                     error_message = response.text
                     self.log('error', f'Error from server: {error_message}')
                     obs = ErrorObservation(
-                        f'Action execution failed: {error_message}', fatal=True
+                        f'Action execution failed: {error_message}',
                     )
             except requests.Timeout:
                 self.log('error', 'No response received within the timeout period.')
                 obs = ErrorObservation(
                     f'Action execution timed out after {action.timeout} seconds.',
-                    fatal=True,
                 )
             except Exception as e:
                 self.log('error', f'Error during action execution: {e}')
-                obs = ErrorObservation(f'Action execution failed: {str(e)}', fatal=True)
+                obs = ErrorObservation(f'Action execution failed: {str(e)}')
             self._refresh_logs()
             return obs
 
diff --git a/openhands/runtime/impl/remote/remote_runtime.py b/openhands/runtime/impl/remote/remote_runtime.py
index 4d6e243be88..1169f7ee7a6 100644
--- a/openhands/runtime/impl/remote/remote_runtime.py
+++ b/openhands/runtime/impl/remote/remote_runtime.py
@@ -363,12 +363,10 @@ def run_action(self, action: Action) -> Observation:
             if action_type not in ACTION_TYPE_TO_CLASS:
                 return ErrorObservation(
                     f'[Runtime (ID={self.runtime_id})] Action {action_type} does not exist.',
-                    fatal=True,
                 )
             if not hasattr(self, action_type):
                 return ErrorObservation(
                     f'[Runtime (ID={self.runtime_id})] Action {action_type} is not supported in the current runtime.',
-                    fatal=True,
                 )
 
             assert action.timeout is not None
@@ -391,20 +389,16 @@ def run_action(self, action: Action) -> Observation:
                 else:
                     error_message = response.text
                     self.log('error', f'Error from server: {error_message}')
-                    obs = ErrorObservation(
-                        f'Action execution failed: {error_message}', fatal=True
-                    )
+                    obs = ErrorObservation(f'Action execution failed: {error_message}')
             except Timeout:
                 self.log('error', 'No response received within the timeout period.')
                 obs = ErrorObservation(
                     f'[Runtime (ID={self.runtime_id})] Action execution timed out',
-                    fatal=True,
                 )
             except Exception as e:
                 self.log('error', f'Error during action execution: {e}')
                 obs = ErrorObservation(
                     f'[Runtime (ID={self.runtime_id})] Action execution failed: {str(e)}',
-                    fatal=True,
                 )
             return obs
 
diff --git a/openhands/runtime/utils/bash.py b/openhands/runtime/utils/bash.py
index 98e3ad9c09b..a5019315a03 100644
--- a/openhands/runtime/utils/bash.py
+++ b/openhands/runtime/utils/bash.py
@@ -331,5 +331,4 @@ def run(self, action: CmdRunAction) -> CmdOutputObservation | ErrorObservation:
         except UnicodeDecodeError as e:
             return ErrorObservation(
                 f'Runtime bash execution failed: Command output could not be decoded as utf-8. {str(e)}',
-                fatal=True,
             )
diff --git a/openhands/runtime/utils/edit.py b/openhands/runtime/utils/edit.py
index 478da2defc4..fe08f87b2e4 100644
--- a/openhands/runtime/utils/edit.py
+++ b/openhands/runtime/utils/edit.py
@@ -214,8 +214,7 @@ def edit(self, action: FileEditAction) -> Observation:
                 return obs
             if not isinstance(obs, FileWriteObservation):
                 return ErrorObservation(
-                    f'Fatal Runtime in editing: Expected FileWriteObservation, got {type(obs)}: {str(obs)}',
-                    fatal=True,
+                    f'Expected FileWriteObservation, got {type(obs)}: {str(obs)}',
                 )
             return FileEditObservation(
                 content=get_diff('', action.content, action.path),
@@ -226,8 +225,7 @@ def edit(self, action: FileEditAction) -> Observation:
             )
         if not isinstance(obs, FileReadObservation):
             return ErrorObservation(
-                f'Fatal Runtime in editing: Expected FileReadObservation, got {type(obs)}: {str(obs)}',
-                fatal=True,
+                f'Expected FileReadObservation, got {type(obs)}: {str(obs)}',
             )
 
         original_file_content = obs.content

From 4745cb6f135f1461407c4dcfd760f7570166a2a2 Mon Sep 17 00:00:00 2001
From: Robert Brennan <contact@rbren.io>
Date: Tue, 29 Oct 2024 13:15:26 -0400
Subject: [PATCH 17/60] more refactoring

---
 openhands/controller/agent_controller.py | 38 ++++++++++--------------
 1 file changed, 16 insertions(+), 22 deletions(-)

diff --git a/openhands/controller/agent_controller.py b/openhands/controller/agent_controller.py
index 078d145e78e..8961f5a4b4e 100644
--- a/openhands/controller/agent_controller.py
+++ b/openhands/controller/agent_controller.py
@@ -3,8 +3,6 @@
 import traceback
 from typing import Type
 
-import litellm
-
 from openhands.controller.agent import Agent
 from openhands.controller.state.state import State, TrafficControlState
 from openhands.controller.stuck import StuckDetector
@@ -140,17 +138,15 @@ async def update_state_after_step(self):
         # update metrics especially for cost. Use deepcopy to avoid it being modified by agent.reset()
         self.state.local_metrics = copy.deepcopy(self.agent.llm.metrics)
 
-    async def _react_to_error(
+    async def _react_to_exception(
         self,
-        message: str,
+        e: Exception,
         new_state: AgentState | None = None,
     ):
         if new_state is not None:
             # it's important to set the state before adding the error event, so that metrics sync properly
             await self.set_agent_state_to(new_state)
-        await self.event_stream.async_add_event(
-            ErrorObservation(message), EventSource.AGENT
-        )
+        raise e
 
     async def start_step_loop(self):
         """The main loop for the agent's step-by-step execution."""
@@ -165,14 +161,8 @@ async def start_step_loop(self):
             except Exception as e:
                 traceback.print_exc()
                 self.log('error', f'Error while running the agent: {e}')
-                self.log('error', traceback.format_exc())
-                detail = str(e)
-                if isinstance(e, litellm.AuthenticationError):
-                    detail += (
-                        '\nPlease check your credentials. Is your API key correct?'
-                    )
-                await self._react_to_error(
-                    detail,
+                await self._react_to_exception(
+                    exception=e,
                     new_state=AgentState.ERROR,
                 )
                 break
@@ -437,9 +427,13 @@ async def _step(self) -> None:
             if action is None:
                 raise LLMNoActionError('No action was returned')
         except (LLMMalformedActionError, LLMNoActionError, LLMResponseError) as e:
-            await self._react_to_error(
-                str(e), new_state=None
-            )  # don't change state, LLM can correct itself
+            await self.event_stream.async_add_event(
+                ErrorObservation(
+                    content=str(e),
+                    cause=action.id,
+                    agent_state=self.get_agent_state(),
+                )
+            )
             return
 
         if action.runnable:
@@ -465,7 +459,7 @@ async def _step(self) -> None:
         self.log('debug', str(action), extra={'msg_type': 'ACTION'})
 
         if self._is_stuck():
-            await self._react_to_error(
+            await self._react_to_exception(
                 'Agent got stuck in a loop', new_state=AgentState.ERROR
             )
 
@@ -486,7 +480,7 @@ async def _delegate_step(self):
             self.delegate = None
             self.delegateAction = None
 
-            await self._react_to_error(
+            await self._react_to_exception(
                 'Delegator agent encountered an error', new_state=None
             )
         elif delegate_state in (AgentState.FINISHED, AgentState.REJECTED):
@@ -537,13 +531,13 @@ async def _handle_traffic_control(
         else:
             self.state.traffic_control_state = TrafficControlState.THROTTLING
             if self.headless_mode:
-                await self._react_to_error(
+                await self._react_to_exception(
                     f'Agent reached maximum {limit_type} in headless mode, task stopped. '
                     f'Current {limit_type}: {current_value:.2f}, max {limit_type}: {max_value:.2f}',
                     new_state=AgentState.ERROR,
                 )
             else:
-                await self._react_to_error(
+                await self._react_to_exception(
                     f'Agent reached maximum {limit_type}, task paused. '
                     f'Current {limit_type}: {current_value:.2f}, max {limit_type}: {max_value:.2f}. '
                     f'{TRAFFIC_CONTROL_REMINDER}',

From d7fe86d4f31f6d737192e795ab664030b6df116d Mon Sep 17 00:00:00 2001
From: Robert Brennan <contact@rbren.io>
Date: Tue, 29 Oct 2024 13:30:48 -0400
Subject: [PATCH 18/60] new react_to_exception

---
 openhands/controller/agent_controller.py | 35 +++++++++++-------------
 1 file changed, 16 insertions(+), 19 deletions(-)

diff --git a/openhands/controller/agent_controller.py b/openhands/controller/agent_controller.py
index 8961f5a4b4e..4cedc1159c9 100644
--- a/openhands/controller/agent_controller.py
+++ b/openhands/controller/agent_controller.py
@@ -141,7 +141,7 @@ async def update_state_after_step(self):
     async def _react_to_exception(
         self,
         e: Exception,
-        new_state: AgentState | None = None,
+        new_state: AgentState = AgentState.ERROR,
     ):
         if new_state is not None:
             # it's important to set the state before adding the error event, so that metrics sync properly
@@ -161,10 +161,7 @@ async def start_step_loop(self):
             except Exception as e:
                 traceback.print_exc()
                 self.log('error', f'Error while running the agent: {e}')
-                await self._react_to_exception(
-                    exception=e,
-                    new_state=AgentState.ERROR,
-                )
+                await self._react_to_exception(e)
                 break
 
             await asyncio.sleep(0.1)
@@ -430,9 +427,8 @@ async def _step(self) -> None:
             await self.event_stream.async_add_event(
                 ErrorObservation(
                     content=str(e),
-                    cause=action.id,
-                    agent_state=self.get_agent_state(),
-                )
+                ),
+                EventSource.AGENT,
             )
             return
 
@@ -459,9 +455,7 @@ async def _step(self) -> None:
         self.log('debug', str(action), extra={'msg_type': 'ACTION'})
 
         if self._is_stuck():
-            await self._react_to_exception(
-                'Agent got stuck in a loop', new_state=AgentState.ERROR
-            )
+            await self._react_to_exception(RuntimeError('Agent got stuck in a loop'))
 
     async def _delegate_step(self):
         """Executes a single step of the delegate agent."""
@@ -480,8 +474,9 @@ async def _delegate_step(self):
             self.delegate = None
             self.delegateAction = None
 
-            await self._react_to_exception(
-                'Delegator agent encountered an error', new_state=None
+            await self.event_stream.async_add_event(
+                ErrorObservation('Delegate agent encountered an error'),
+                EventSource.AGENT,
             )
         elif delegate_state in (AgentState.FINISHED, AgentState.REJECTED):
             self.log('debug', 'Delegate agent has finished execution')
@@ -531,16 +526,18 @@ async def _handle_traffic_control(
         else:
             self.state.traffic_control_state = TrafficControlState.THROTTLING
             if self.headless_mode:
-                await self._react_to_exception(
-                    f'Agent reached maximum {limit_type} in headless mode, task stopped. '
-                    f'Current {limit_type}: {current_value:.2f}, max {limit_type}: {max_value:.2f}',
-                    new_state=AgentState.ERROR,
+                e = RuntimeError(
+                    f'Agent reached maximum {limit_type} in headless mode. '
+                    f'Current {limit_type}: {current_value:.2f}, max {limit_type}: {max_value:.2f}'
                 )
+                await self._react_to_exception(e)
             else:
-                await self._react_to_exception(
+                e = RuntimeError(
                     f'Agent reached maximum {limit_type}, task paused. '
                     f'Current {limit_type}: {current_value:.2f}, max {limit_type}: {max_value:.2f}. '
-                    f'{TRAFFIC_CONTROL_REMINDER}',
+                )
+                await self._react_to_exception(
+                    e,
                     new_state=AgentState.PAUSED,
                 )
             stop_step = True

From d240775fb0999cb28bce2d70f293aa205ba23d35 Mon Sep 17 00:00:00 2001
From: Robert Brennan <contact@rbren.io>
Date: Tue, 29 Oct 2024 14:00:33 -0400
Subject: [PATCH 19/60] change up status message plumbing

---
 frontend/src/services/actions.ts              | 10 ++++++----
 frontend/src/types/Message.tsx                | 10 ++++------
 openhands/controller/agent_controller.py      | 12 +++++++++--
 openhands/runtime/base.py                     | 19 ++++++++++++++++--
 openhands/runtime/impl/e2b/e2b_runtime.py     |  4 ++--
 .../impl/eventstream/eventstream_runtime.py   | 15 +++++---------
 openhands/runtime/impl/modal/modal_runtime.py |  6 +++---
 .../runtime/impl/remote/remote_runtime.py     |  9 ++-------
 openhands/server/session/agent_session.py     | 18 ++++++++---------
 openhands/server/session/session.py           | 20 +++++++++++++------
 10 files changed, 72 insertions(+), 51 deletions(-)

diff --git a/frontend/src/services/actions.ts b/frontend/src/services/actions.ts
index 46b6aad8513..9d215dcb146 100644
--- a/frontend/src/services/actions.ts
+++ b/frontend/src/services/actions.ts
@@ -119,11 +119,11 @@ export function handleActionMessage(message: ActionMessage) {
 }
 
 export function handleStatusMessage(message: StatusMessage) {
-  const msg = message.status == null ? "" : message.status.trim();
+  const msg_id = message.id;
   store.dispatch(
     setCurStatusMessage({
       ...message,
-      status: msg,
+      status: msg_id,
     }),
   );
 }
@@ -139,9 +139,11 @@ export function handleAssistantMessage(data: string | SocketMessage) {
 
   if ("action" in socketMessage) {
     handleActionMessage(socketMessage);
-  } else if ("status" in socketMessage) {
+  } else if ("observation" in socketMessage) {
+    handleObservationMessage(socketMessage);
+  } else if ("status_update" in socketMessage) {
     handleStatusMessage(socketMessage);
   } else {
-    handleObservationMessage(socketMessage);
+    console.error("Unknown message type", socketMessage);
   }
 }
diff --git a/frontend/src/types/Message.tsx b/frontend/src/types/Message.tsx
index d4d365d5904..85b1d970641 100644
--- a/frontend/src/types/Message.tsx
+++ b/frontend/src/types/Message.tsx
@@ -33,10 +33,8 @@ export interface ObservationMessage {
 }
 
 export interface StatusMessage {
-  // TODO not implemented yet
-  // Whether the status is an error, default is false
-  is_error: boolean;
-
-  // A status message to display to the user
-  status: string;
+  status_update: true;
+  type: string;
+  id: string;
+  message: string;
 }
diff --git a/openhands/controller/agent_controller.py b/openhands/controller/agent_controller.py
index 4cedc1159c9..4e430c05ee9 100644
--- a/openhands/controller/agent_controller.py
+++ b/openhands/controller/agent_controller.py
@@ -1,7 +1,9 @@
 import asyncio
 import copy
 import traceback
-from typing import Type
+from typing import Callable, Optional, Type
+
+import litellm
 
 from openhands.controller.agent import Agent
 from openhands.controller.state.state import State, TrafficControlState
@@ -73,6 +75,7 @@ def __init__(
         initial_state: State | None = None,
         is_delegate: bool = False,
         headless_mode: bool = True,
+        status_callback: Optional[Callable] = None,
     ):
         """Initializes a new instance of the AgentController class.
 
@@ -115,6 +118,7 @@ def __init__(
 
         # stuck helper
         self._stuck_detector = StuckDetector(self.state)
+        self._status_callback = status_callback
 
     async def close(self):
         """Closes the agent controller, canceling any ongoing tasks and unsubscribing from the event stream."""
@@ -146,7 +150,11 @@ async def _react_to_exception(
         if new_state is not None:
             # it's important to set the state before adding the error event, so that metrics sync properly
             await self.set_agent_state_to(new_state)
-        raise e
+        if self._status_callback is not None:
+            err_id = ''
+            if isinstance(e, litellm.AuthenticationError):
+                err_id = 'llm_authentication_error'
+            await self._status_callback('error', err_id, str(e))
 
     async def start_step_loop(self):
         """The main loop for the agent's step-by-step execution."""
diff --git a/openhands/runtime/base.py b/openhands/runtime/base.py
index 474ba741a47..1b59376924a 100644
--- a/openhands/runtime/base.py
+++ b/openhands/runtime/base.py
@@ -31,6 +31,14 @@
 from openhands.runtime.utils.edit import FileEditRuntimeMixin
 from openhands.utils.async_utils import call_sync_from_async
 
+STATUS_MESSAGES = {
+    'STATUS$STARTING_RUNTIME': 'Starting runtime...',
+    'STATUS$STARTING_CONTAINER': 'Starting container...',
+    'STATUS$PREPARING_CONTAINER': 'Preparing container...',
+    'STATUS$CONTAINER_STARTED': 'Container started.',
+    'STATUS$WAITING_FOR_CLIENT': 'Waiting for client...',
+}
+
 
 def _default_env_vars(sandbox_config: SandboxConfig) -> dict[str, str]:
     ret = {}
@@ -62,14 +70,14 @@ def __init__(
         sid: str = 'default',
         plugins: list[PluginRequirement] | None = None,
         env_vars: dict[str, str] | None = None,
-        status_message_callback: Callable | None = None,
+        status_callback: Callable | None = None,
         attach_to_existing: bool = False,
     ):
         self.sid = sid
         self.event_stream = event_stream
         self.event_stream.subscribe(EventStreamSubscriber.RUNTIME, self.on_event)
         self.plugins = plugins if plugins is not None and len(plugins) > 0 else []
-        self.status_message_callback = status_message_callback
+        self.status_callback = status_callback
         self.attach_to_existing = attach_to_existing
 
         self.config = copy.deepcopy(config)
@@ -97,6 +105,13 @@ def log(self, level: str, message: str) -> None:
         message = f'[runtime {self.sid}] {message}'
         getattr(logger, level)(message)
 
+    def send_status_message(self, message_id: str):
+        """Sends a status message if the callback function was provided."""
+        print('SEND STATUS', self.status_callback)
+        if self.status_callback:
+            msg = STATUS_MESSAGES.get(message_id, '')
+            self.status_callback('status', message_id, msg)
+
     # ====================================================================
 
     def add_env_vars(self, env_vars: dict[str, str]) -> None:
diff --git a/openhands/runtime/impl/e2b/e2b_runtime.py b/openhands/runtime/impl/e2b/e2b_runtime.py
index b5233574f0b..7c9c297f424 100644
--- a/openhands/runtime/impl/e2b/e2b_runtime.py
+++ b/openhands/runtime/impl/e2b/e2b_runtime.py
@@ -27,14 +27,14 @@ def __init__(
         sid: str = 'default',
         plugins: list[PluginRequirement] | None = None,
         sandbox: E2BSandbox | None = None,
-        status_message_callback: Optional[Callable] = None,
+        status_callback: Optional[Callable] = None,
     ):
         super().__init__(
             config,
             event_stream,
             sid,
             plugins,
-            status_message_callback=status_message_callback,
+            status_callback=status_callback,
         )
         if sandbox is None:
             self.sandbox = E2BSandbox()
diff --git a/openhands/runtime/impl/eventstream/eventstream_runtime.py b/openhands/runtime/impl/eventstream/eventstream_runtime.py
index aac4c9e5770..17814cf67ff 100644
--- a/openhands/runtime/impl/eventstream/eventstream_runtime.py
+++ b/openhands/runtime/impl/eventstream/eventstream_runtime.py
@@ -123,7 +123,7 @@ def init_base_runtime(
         sid: str = 'default',
         plugins: list[PluginRequirement] | None = None,
         env_vars: dict[str, str] | None = None,
-        status_message_callback: Callable | None = None,
+        status_callback: Callable | None = None,
         attach_to_existing: bool = False,
     ):
         super().__init__(
@@ -132,7 +132,7 @@ def init_base_runtime(
             sid,
             plugins,
             env_vars,
-            status_message_callback,
+            status_callback,
             attach_to_existing,
         )
 
@@ -143,7 +143,7 @@ def __init__(
         sid: str = 'default',
         plugins: list[PluginRequirement] | None = None,
         env_vars: dict[str, str] | None = None,
-        status_message_callback: Callable | None = None,
+        status_callback: Callable | None = None,
         attach_to_existing: bool = False,
     ):
         self.config = config
@@ -151,7 +151,7 @@ def __init__(
         self._container_port = 30001  # initial dummy value
         self.api_url = f'{self.config.sandbox.local_runtime_url}:{self._container_port}'
         self.session = requests.Session()
-        self.status_message_callback = status_message_callback
+        self.status_callback = status_callback
 
         self.docker_client: docker.DockerClient = self._init_docker_client()
         self.base_container_image = self.config.sandbox.base_container_image
@@ -181,7 +181,7 @@ def __init__(
             sid,
             plugins,
             env_vars,
-            status_message_callback,
+            status_callback,
             attach_to_existing,
         )
 
@@ -664,8 +664,3 @@ def _find_available_port(self, max_attempts=5):
                 return port
         # If no port is found after max_attempts, return the last tried port
         return port
-
-    def send_status_message(self, message: str):
-        """Sends a status message if the callback function was provided."""
-        if self.status_message_callback:
-            self.status_message_callback(message)
diff --git a/openhands/runtime/impl/modal/modal_runtime.py b/openhands/runtime/impl/modal/modal_runtime.py
index 3a484c43e69..0e598a437f4 100644
--- a/openhands/runtime/impl/modal/modal_runtime.py
+++ b/openhands/runtime/impl/modal/modal_runtime.py
@@ -75,7 +75,7 @@ def __init__(
         sid: str = 'default',
         plugins: list[PluginRequirement] | None = None,
         env_vars: dict[str, str] | None = None,
-        status_message_callback: Callable | None = None,
+        status_callback: Callable | None = None,
         attach_to_existing: bool = False,
     ):
         assert config.modal_api_token_id, 'Modal API token id is required'
@@ -102,7 +102,7 @@ def __init__(
         self.container_port = 3000
 
         self.session = requests.Session()
-        self.status_message_callback = status_message_callback
+        self.status_callback = status_callback
         self.base_container_image_id = self.config.sandbox.base_container_image
         self.runtime_container_image_id = self.config.sandbox.runtime_container_image
         self.action_semaphore = threading.Semaphore(1)  # Ensure one action at a time
@@ -122,7 +122,7 @@ def __init__(
             sid,
             plugins,
             env_vars,
-            status_message_callback,
+            status_callback,
             attach_to_existing,
         )
 
diff --git a/openhands/runtime/impl/remote/remote_runtime.py b/openhands/runtime/impl/remote/remote_runtime.py
index 1169f7ee7a6..a628db49654 100644
--- a/openhands/runtime/impl/remote/remote_runtime.py
+++ b/openhands/runtime/impl/remote/remote_runtime.py
@@ -51,7 +51,7 @@ def __init__(
         sid: str = 'default',
         plugins: list[PluginRequirement] | None = None,
         env_vars: dict[str, str] | None = None,
-        status_message_callback: Optional[Callable] = None,
+        status_callback: Optional[Callable] = None,
         attach_to_existing: bool = False,
     ):
         super().__init__(
@@ -60,7 +60,7 @@ def __init__(
             sid,
             plugins,
             env_vars,
-            status_message_callback,
+            status_callback,
             attach_to_existing,
         )
 
@@ -539,8 +539,3 @@ def copy_from(self, path: str) -> bytes:
             raise RuntimeError(
                 f'[Runtime (ID={self.runtime_id})] Copy operation failed: {str(e)}'
             )
-
-    def send_status_message(self, message: str):
-        """Sends a status message if the callback function was provided."""
-        if self.status_message_callback:
-            self.status_message_callback(message)
diff --git a/openhands/server/session/agent_session.py b/openhands/server/session/agent_session.py
index 41ee9688916..b7fd08ba823 100644
--- a/openhands/server/session/agent_session.py
+++ b/openhands/server/session/agent_session.py
@@ -32,7 +32,12 @@ class AgentSession:
     _closed: bool = False
     loop: asyncio.AbstractEventLoop | None = None
 
-    def __init__(self, sid: str, file_store: FileStore):
+    def __init__(
+        self,
+        sid: str,
+        file_store: FileStore,
+        status_callback: Optional[Callable] = None,
+    ):
         """Initializes a new instance of the Session class
 
         Parameters:
@@ -43,6 +48,7 @@ def __init__(self, sid: str, file_store: FileStore):
         self.sid = sid
         self.event_stream = EventStream(sid, file_store)
         self.file_store = file_store
+        self._status_callback = status_callback
 
     async def start(
         self,
@@ -53,7 +59,6 @@ async def start(
         max_budget_per_task: float | None = None,
         agent_to_llm_config: dict[str, LLMConfig] | None = None,
         agent_configs: dict[str, AgentConfig] | None = None,
-        status_message_callback: Optional[Callable] = None,
     ):
         """Starts the Agent session
         Parameters:
@@ -80,7 +85,6 @@ async def start(
             max_budget_per_task,
             agent_to_llm_config,
             agent_configs,
-            status_message_callback,
         )
 
     def _start_thread(self, *args):
@@ -99,7 +103,6 @@ async def _start(
         max_budget_per_task: float | None = None,
         agent_to_llm_config: dict[str, LLMConfig] | None = None,
         agent_configs: dict[str, AgentConfig] | None = None,
-        status_message_callback: Optional[Callable] = None,
     ):
         self.loop = asyncio.get_running_loop()
         self._create_security_analyzer(config.security.security_analyzer)
@@ -107,7 +110,6 @@ async def _start(
             runtime_name=runtime_name,
             config=config,
             agent=agent,
-            status_message_callback=status_message_callback,
         )
         self._create_controller(
             agent,
@@ -161,7 +163,6 @@ async def _create_runtime(
         runtime_name: str,
         config: AppConfig,
         agent: Agent,
-        status_message_callback: Optional[Callable] = None,
     ):
         """Creates a runtime instance
 
@@ -181,7 +182,7 @@ async def _create_runtime(
             event_stream=self.event_stream,
             sid=self.sid,
             plugins=agent.sandbox_plugins,
-            status_message_callback=status_message_callback,
+            status_callback=self._status_callback,
         )
 
         try:
@@ -251,9 +252,8 @@ def _create_controller(
             agent_to_llm_config=agent_to_llm_config,
             agent_configs=agent_configs,
             confirmation_mode=confirmation_mode,
-            # AgentSession is designed to communicate with the frontend, so we don't want to
-            # run the agent in headless mode.
             headless_mode=False,
+            status_callback=self._status_callback,
         )
         try:
             agent_state = State.restore_from_session(self.sid, self.file_store)
diff --git a/openhands/server/session/session.py b/openhands/server/session/session.py
index 4e6119a1856..b83f09b2493 100644
--- a/openhands/server/session/session.py
+++ b/openhands/server/session/session.py
@@ -42,7 +42,9 @@ def __init__(
         self.sid = sid
         self.websocket = ws
         self.last_active_ts = int(time.time())
-        self.agent_session = AgentSession(sid, file_store)
+        self.agent_session = AgentSession(
+            sid, file_store, status_callback=self.queue_status_message
+        )
         self.agent_session.event_stream.subscribe(
             EventStreamSubscriber.SERVER, self.on_event
         )
@@ -116,7 +118,6 @@ async def _initialize_agent(self, data: dict):
                 max_budget_per_task=self.config.max_budget_per_task,
                 agent_to_llm_config=self.config.get_agent_to_llm_config_map(),
                 agent_configs=self.config.get_agent_configs(),
-                status_message_callback=self.queue_status_message,
             )
         except Exception as e:
             logger.exception(f'Error creating controller: {e}')
@@ -177,6 +178,7 @@ async def send(self, data: dict[str, object]) -> bool:
         try:
             if self.websocket is None or not self.is_alive:
                 return False
+            print('SEND JSON', data)
             await self.websocket.send_json(data)
             await asyncio.sleep(0.001)  # This flushes the data to the client
             self.last_active_ts = int(time.time())
@@ -196,9 +198,12 @@ async def send_message(self, message: str) -> bool:
         """Sends a message to the client."""
         return await self.send({'message': message})
 
-    async def send_status_message(self, message: str) -> bool:
+    async def send_status_message(self, msg_type: str, id: str, message: str) -> bool:
         """Sends a status message to the client."""
-        return await self.send({'status': message})
+        print('SEND STATUS BACK TO CLIENT')
+        return await self.send(
+            {'status_update': True, 'type': msg_type, 'id': id, 'message': message}
+        )
 
     def update_connection(self, ws: WebSocket):
         self.websocket = ws
@@ -212,7 +217,10 @@ def load_from_data(self, data: dict) -> bool:
         self.is_alive = data.get('is_alive', False)
         return True
 
-    def queue_status_message(self, message: str):
+    def queue_status_message(self, msg_type: str, id: str, message: str):
         """Queues a status message to be sent asynchronously."""
         # Ensure the coroutine runs in the main event loop
-        asyncio.run_coroutine_threadsafe(self.send_status_message(message), self.loop)
+        print('QUEUE STATUS MESSAGE')
+        asyncio.run_coroutine_threadsafe(
+            self.send_status_message(msg_type, id, message), self.loop
+        )

From d9f834e8c2785910d7dcd829a208e77450c8580a Mon Sep 17 00:00:00 2001
From: Robert Brennan <contact@rbren.io>
Date: Tue, 29 Oct 2024 14:47:16 -0400
Subject: [PATCH 20/60] logspam

---
 openhands/runtime/base.py           | 1 -
 openhands/server/session/session.py | 3 ---
 2 files changed, 4 deletions(-)

diff --git a/openhands/runtime/base.py b/openhands/runtime/base.py
index 1b59376924a..d3b236131d1 100644
--- a/openhands/runtime/base.py
+++ b/openhands/runtime/base.py
@@ -107,7 +107,6 @@ def log(self, level: str, message: str) -> None:
 
     def send_status_message(self, message_id: str):
         """Sends a status message if the callback function was provided."""
-        print('SEND STATUS', self.status_callback)
         if self.status_callback:
             msg = STATUS_MESSAGES.get(message_id, '')
             self.status_callback('status', message_id, msg)
diff --git a/openhands/server/session/session.py b/openhands/server/session/session.py
index b83f09b2493..2784cac5f0d 100644
--- a/openhands/server/session/session.py
+++ b/openhands/server/session/session.py
@@ -178,7 +178,6 @@ async def send(self, data: dict[str, object]) -> bool:
         try:
             if self.websocket is None or not self.is_alive:
                 return False
-            print('SEND JSON', data)
             await self.websocket.send_json(data)
             await asyncio.sleep(0.001)  # This flushes the data to the client
             self.last_active_ts = int(time.time())
@@ -200,7 +199,6 @@ async def send_message(self, message: str) -> bool:
 
     async def send_status_message(self, msg_type: str, id: str, message: str) -> bool:
         """Sends a status message to the client."""
-        print('SEND STATUS BACK TO CLIENT')
         return await self.send(
             {'status_update': True, 'type': msg_type, 'id': id, 'message': message}
         )
@@ -220,7 +218,6 @@ def load_from_data(self, data: dict) -> bool:
     def queue_status_message(self, msg_type: str, id: str, message: str):
         """Queues a status message to be sent asynchronously."""
         # Ensure the coroutine runs in the main event loop
-        print('QUEUE STATUS MESSAGE')
         asyncio.run_coroutine_threadsafe(
             self.send_status_message(msg_type, id, message), self.loop
         )

From 99486f4ae7ac35ca9b326dfa188998b015c9a36a Mon Sep 17 00:00:00 2001
From: Robert Brennan <contact@rbren.io>
Date: Tue, 29 Oct 2024 14:52:26 -0400
Subject: [PATCH 21/60] fix await

---
 openhands/controller/agent_controller.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/openhands/controller/agent_controller.py b/openhands/controller/agent_controller.py
index 4e430c05ee9..568877a6ffa 100644
--- a/openhands/controller/agent_controller.py
+++ b/openhands/controller/agent_controller.py
@@ -154,7 +154,7 @@ async def _react_to_exception(
             err_id = ''
             if isinstance(e, litellm.AuthenticationError):
                 err_id = 'llm_authentication_error'
-            await self._status_callback('error', err_id, str(e))
+            self._status_callback('error', err_id, str(e))
 
     async def start_step_loop(self):
         """The main loop for the agent's step-by-step execution."""

From ed7f4ee34d073b0d3ba2abdd02695d9c77f378c7 Mon Sep 17 00:00:00 2001
From: Robert Brennan <contact@rbren.io>
Date: Tue, 29 Oct 2024 15:25:30 -0400
Subject: [PATCH 22/60] better error toasts on frontend

---
 frontend/src/components/AgentStatusBar.tsx | 28 +++++++++++++++-------
 frontend/src/services/actions.ts           |  2 --
 2 files changed, 20 insertions(+), 10 deletions(-)

diff --git a/frontend/src/components/AgentStatusBar.tsx b/frontend/src/components/AgentStatusBar.tsx
index c337a838f32..be6eba4448a 100644
--- a/frontend/src/components/AgentStatusBar.tsx
+++ b/frontend/src/components/AgentStatusBar.tsx
@@ -5,6 +5,7 @@ import { I18nKey } from "#/i18n/declaration";
 import { RootState } from "#/store";
 import AgentState from "#/types/AgentState";
 import beep from "#/utils/beep";
+import toast from "react-hot-toast";
 
 enum IndicatorColor {
   BLUE = "bg-blue-500",
@@ -16,7 +17,7 @@ enum IndicatorColor {
 }
 
 function AgentStatusBar() {
-  const { t } = useTranslation();
+  const { t, i18n } = useTranslation();
   const { curAgentState } = useSelector((state: RootState) => state.agent);
   const { curStatusMessage } = useSelector((state: RootState) => state.status);
 
@@ -94,15 +95,26 @@ function AgentStatusBar() {
   const [statusMessage, setStatusMessage] = React.useState<string>("");
 
   React.useEffect(() => {
-    if (curAgentState === AgentState.LOADING) {
-      const trimmedCustomMessage = curStatusMessage.status.trim();
-      if (trimmedCustomMessage) {
-        setStatusMessage(t(trimmedCustomMessage));
-        return;
+    let message = curStatusMessage.message || '';
+    if (curStatusMessage?.id) {
+      const id = curStatusMessage.id.trim();
+      console.log('status message id', id);
+      if (i18n.exists(id)) {
+        console.log('exists');
+        message = t(curStatusMessage.id.trim()) || message;
       }
     }
-    setStatusMessage(AgentStatusMap[curAgentState].message);
-  }, [curAgentState, curStatusMessage.status]);
+    if (curStatusMessage?.type === "error") {
+      console.log('error', message);
+      toast.error(message);
+      return;
+    }
+    if (curAgentState === AgentState.LOADING && message.trim()) {
+      setStatusMessage(message);
+    } else {
+      setStatusMessage(AgentStatusMap[curAgentState].message);
+    }
+  }, [curAgentState, curStatusMessage.id]);
 
   return (
     <div className="flex flex-col items-center">
diff --git a/frontend/src/services/actions.ts b/frontend/src/services/actions.ts
index 9d215dcb146..620f4513113 100644
--- a/frontend/src/services/actions.ts
+++ b/frontend/src/services/actions.ts
@@ -119,11 +119,9 @@ export function handleActionMessage(message: ActionMessage) {
 }
 
 export function handleStatusMessage(message: StatusMessage) {
-  const msg_id = message.id;
   store.dispatch(
     setCurStatusMessage({
       ...message,
-      status: msg_id,
     }),
   );
 }

From 2e342b870e2dcbf531010bcd73e4014ef1bdbf5e Mon Sep 17 00:00:00 2001
From: Robert Brennan <contact@rbren.io>
Date: Tue, 29 Oct 2024 15:32:51 -0400
Subject: [PATCH 23/60] better error plumbing

---
 frontend/src/components/AgentStatusBar.tsx | 7 ++++++-
 openhands/controller/agent_controller.py   | 3 ++-
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/frontend/src/components/AgentStatusBar.tsx b/frontend/src/components/AgentStatusBar.tsx
index be6eba4448a..351f4924bba 100644
--- a/frontend/src/components/AgentStatusBar.tsx
+++ b/frontend/src/components/AgentStatusBar.tsx
@@ -95,6 +95,7 @@ function AgentStatusBar() {
   const [statusMessage, setStatusMessage] = React.useState<string>("");
 
   React.useEffect(() => {
+    console.log('cur status message', curStatusMessage);
     let message = curStatusMessage.message || '';
     if (curStatusMessage?.id) {
       const id = curStatusMessage.id.trim();
@@ -114,7 +115,11 @@ function AgentStatusBar() {
     } else {
       setStatusMessage(AgentStatusMap[curAgentState].message);
     }
-  }, [curAgentState, curStatusMessage.id]);
+  }, [curStatusMessage.id]);
+
+  React.useEffect(() => {
+      setStatusMessage(AgentStatusMap[curAgentState].message);
+  }, [curAgentState]);
 
   return (
     <div className="flex flex-col items-center">
diff --git a/openhands/controller/agent_controller.py b/openhands/controller/agent_controller.py
index 568877a6ffa..7a02166deb0 100644
--- a/openhands/controller/agent_controller.py
+++ b/openhands/controller/agent_controller.py
@@ -322,9 +322,10 @@ async def set_agent_state_to(self, new_state: AgentState):
             )
 
         self.state.agent_state = new_state
-        self.event_stream.add_event(
+        await self.event_stream.async_add_event(
             AgentStateChangedObservation('', self.state.agent_state), EventSource.AGENT
         )
+        print('sent state change obs')
 
         if new_state == AgentState.INIT and self.state.resume_state:
             await self.set_agent_state_to(self.state.resume_state)

From 154b838b442fdcbcd839a80d0fb63205d024880f Mon Sep 17 00:00:00 2001
From: Robert Brennan <contact@rbren.io>
Date: Tue, 29 Oct 2024 15:33:04 -0400
Subject: [PATCH 24/60] logspam

---
 frontend/src/components/AgentStatusBar.tsx | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/frontend/src/components/AgentStatusBar.tsx b/frontend/src/components/AgentStatusBar.tsx
index 351f4924bba..8aeb567f096 100644
--- a/frontend/src/components/AgentStatusBar.tsx
+++ b/frontend/src/components/AgentStatusBar.tsx
@@ -95,18 +95,14 @@ function AgentStatusBar() {
   const [statusMessage, setStatusMessage] = React.useState<string>("");
 
   React.useEffect(() => {
-    console.log('cur status message', curStatusMessage);
     let message = curStatusMessage.message || '';
     if (curStatusMessage?.id) {
       const id = curStatusMessage.id.trim();
-      console.log('status message id', id);
       if (i18n.exists(id)) {
-        console.log('exists');
         message = t(curStatusMessage.id.trim()) || message;
       }
     }
     if (curStatusMessage?.type === "error") {
-      console.log('error', message);
       toast.error(message);
       return;
     }

From 45ba7cf1139ded62425acea7ccafed3e8888f237 Mon Sep 17 00:00:00 2001
From: Robert Brennan <contact@rbren.io>
Date: Tue, 29 Oct 2024 16:36:19 -0400
Subject: [PATCH 25/60] better restarting

---
 openhands/agenthub/codeact_agent/codeact_agent.py |  2 +-
 openhands/controller/agent_controller.py          |  2 +-
 openhands/server/session/agent_session.py         |  1 +
 openhands/server/session/session.py               | 12 ++++++------
 4 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/openhands/agenthub/codeact_agent/codeact_agent.py b/openhands/agenthub/codeact_agent/codeact_agent.py
index 997d424c515..9dca9baf235 100644
--- a/openhands/agenthub/codeact_agent/codeact_agent.py
+++ b/openhands/agenthub/codeact_agent/codeact_agent.py
@@ -110,7 +110,7 @@ def __init__(
                 codeact_enable_jupyter=self.config.codeact_enable_jupyter,
                 codeact_enable_llm_editor=self.config.codeact_enable_llm_editor,
             )
-            logger.info(
+            logger.debug(
                 f'TOOLS loaded for CodeActAgent: {json.dumps(self.tools, indent=2)}'
             )
             self.system_prompt = codeact_function_calling.SYSTEM_PROMPT
diff --git a/openhands/controller/agent_controller.py b/openhands/controller/agent_controller.py
index 7a02166deb0..990cb6deb88 100644
--- a/openhands/controller/agent_controller.py
+++ b/openhands/controller/agent_controller.py
@@ -170,7 +170,6 @@ async def start_step_loop(self):
                 traceback.print_exc()
                 self.log('error', f'Error while running the agent: {e}')
                 await self._react_to_exception(e)
-                break
 
             await asyncio.sleep(0.1)
 
@@ -256,6 +255,7 @@ async def _handle_message_action(self, action: MessageAction):
                 str(action),
                 extra={'msg_type': 'ACTION', 'event_source': EventSource.USER},
             )
+            print('GOT MESSAGE ACTION, RESTARTING')
             if self.get_agent_state() != AgentState.RUNNING:
                 await self.set_agent_state_to(AgentState.RUNNING)
         elif action.source == EventSource.AGENT and action.wait_for_response:
diff --git a/openhands/server/session/agent_session.py b/openhands/server/session/agent_session.py
index b7fd08ba823..6194f6d3a6c 100644
--- a/openhands/server/session/agent_session.py
+++ b/openhands/server/session/agent_session.py
@@ -128,6 +128,7 @@ async def _start(
 
     async def close(self):
         """Closes the Agent session"""
+        print('CLOSING AGENT SESSION')
 
         if self._closed:
             return
diff --git a/openhands/server/session/session.py b/openhands/server/session/session.py
index 2784cac5f0d..678982651af 100644
--- a/openhands/server/session/session.py
+++ b/openhands/server/session/session.py
@@ -52,6 +52,7 @@ def __init__(
         self.loop = asyncio.get_event_loop()
 
     async def close(self):
+        print('CLOSE SESSION')
         self.is_alive = False
         await self.agent_session.close()
 
@@ -67,9 +68,11 @@ async def loop_recv(self):
                     continue
                 await self.dispatch(data)
         except WebSocketDisconnect:
+            print('WebSocketDisconnect, closing!')
             await self.close()
             logger.debug('WebSocket disconnected, sid: %s', self.sid)
         except RuntimeError as e:
+            print('RuntimeError, closing!', e)
             await self.close()
             logger.exception('Error in loop_recv: %s', e)
 
@@ -167,12 +170,9 @@ async def dispatch(self, data: dict):
                     )
                     return
         if self.agent_session.loop:
-            asyncio.run_coroutine_threadsafe(
-                self._add_event(event, EventSource.USER), self.agent_session.loop
-            )  # type: ignore
-
-    async def _add_event(self, event, event_source):
-        self.agent_session.event_stream.add_event(event, EventSource.USER)
+            await self.agent_session.event_stream.async_add_event(
+                event, EventSource.USER
+            )
 
     async def send(self, data: dict[str, object]) -> bool:
         try:

From 9c28805718d0c3cfded63764572abece9d5ad154 Mon Sep 17 00:00:00 2001
From: Engel Nyst <enyst@users.noreply.github.com>
Date: Wed, 30 Oct 2024 04:55:20 +0100
Subject: [PATCH 26/60] Update openhands/controller/agent_controller.py

---
 openhands/controller/agent_controller.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/openhands/controller/agent_controller.py b/openhands/controller/agent_controller.py
index 4e430c05ee9..9b268536e71 100644
--- a/openhands/controller/agent_controller.py
+++ b/openhands/controller/agent_controller.py
@@ -1,7 +1,7 @@
 import asyncio
 import copy
 import traceback
-from typing import Callable, Optional, Type
+from typing import Callable, Type
 
 import litellm
 

From b757831adbf51143e7a2e66589dc963a970c7365 Mon Sep 17 00:00:00 2001
From: Engel Nyst <enyst@users.noreply.github.com>
Date: Wed, 30 Oct 2024 04:55:52 +0100
Subject: [PATCH 27/60] Update openhands/controller/agent_controller.py

---
 openhands/controller/agent_controller.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/openhands/controller/agent_controller.py b/openhands/controller/agent_controller.py
index 9b268536e71..a1e3d98f7b6 100644
--- a/openhands/controller/agent_controller.py
+++ b/openhands/controller/agent_controller.py
@@ -75,7 +75,7 @@ def __init__(
         initial_state: State | None = None,
         is_delegate: bool = False,
         headless_mode: bool = True,
-        status_callback: Optional[Callable] = None,
+        status_callback: Callable | None = None,
     ):
         """Initializes a new instance of the AgentController class.
 

From 3e446bb47d1e46d13e92a8cc158c9e1c8af0ea8b Mon Sep 17 00:00:00 2001
From: Robert Brennan <contact@rbren.io>
Date: Wed, 30 Oct 2024 16:50:30 -0400
Subject: [PATCH 28/60] delint

---
 frontend/src/components/AgentStatusBar.tsx | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/frontend/src/components/AgentStatusBar.tsx b/frontend/src/components/AgentStatusBar.tsx
index 8aeb567f096..7de9ae0397e 100644
--- a/frontend/src/components/AgentStatusBar.tsx
+++ b/frontend/src/components/AgentStatusBar.tsx
@@ -1,11 +1,11 @@
 import React, { useEffect } from "react";
 import { useTranslation } from "react-i18next";
 import { useSelector } from "react-redux";
+import toast from "react-hot-toast";
 import { I18nKey } from "#/i18n/declaration";
 import { RootState } from "#/store";
 import AgentState from "#/types/AgentState";
 import beep from "#/utils/beep";
-import toast from "react-hot-toast";
 
 enum IndicatorColor {
   BLUE = "bg-blue-500",
@@ -95,7 +95,7 @@ function AgentStatusBar() {
   const [statusMessage, setStatusMessage] = React.useState<string>("");
 
   React.useEffect(() => {
-    let message = curStatusMessage.message || '';
+    let message = curStatusMessage.message || "";
     if (curStatusMessage?.id) {
       const id = curStatusMessage.id.trim();
       if (i18n.exists(id)) {
@@ -114,7 +114,7 @@ function AgentStatusBar() {
   }, [curStatusMessage.id]);
 
   React.useEffect(() => {
-      setStatusMessage(AgentStatusMap[curAgentState].message);
+    setStatusMessage(AgentStatusMap[curAgentState].message);
   }, [curAgentState]);
 
   return (

From 9289cdbd61547b5c3f19e5a409ac5ab3f69a5b02 Mon Sep 17 00:00:00 2001
From: Robert Brennan <contact@rbren.io>
Date: Wed, 30 Oct 2024 16:56:42 -0400
Subject: [PATCH 29/60] fix threading

---
 openhands/server/session/agent_session.py | 1 -
 openhands/server/session/session.py       | 7 ++++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/openhands/server/session/agent_session.py b/openhands/server/session/agent_session.py
index 6194f6d3a6c..b7fd08ba823 100644
--- a/openhands/server/session/agent_session.py
+++ b/openhands/server/session/agent_session.py
@@ -128,7 +128,6 @@ async def _start(
 
     async def close(self):
         """Closes the Agent session"""
-        print('CLOSING AGENT SESSION')
 
         if self._closed:
             return
diff --git a/openhands/server/session/session.py b/openhands/server/session/session.py
index 678982651af..a67918805db 100644
--- a/openhands/server/session/session.py
+++ b/openhands/server/session/session.py
@@ -170,9 +170,10 @@ async def dispatch(self, data: dict):
                     )
                     return
         if self.agent_session.loop:
-            await self.agent_session.event_stream.async_add_event(
-                event, EventSource.USER
-            )
+            asyncio.run_coroutine_threadsafe(
+                self.agent_session.event_stream.add_event(event, EventSource.USER),
+                self.agent_session.loop,
+            )  # type: ignore
 
     async def send(self, data: dict[str, object]) -> bool:
         try:

From 52bbdd8b307e64ad383a7eed018440718f2f45a5 Mon Sep 17 00:00:00 2001
From: Robert Brennan <contact@rbren.io>
Date: Wed, 30 Oct 2024 17:01:11 -0400
Subject: [PATCH 30/60] fix async

---
 openhands/server/session/session.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/openhands/server/session/session.py b/openhands/server/session/session.py
index a67918805db..a2ad87d214f 100644
--- a/openhands/server/session/session.py
+++ b/openhands/server/session/session.py
@@ -171,7 +171,9 @@ async def dispatch(self, data: dict):
                     return
         if self.agent_session.loop:
             asyncio.run_coroutine_threadsafe(
-                self.agent_session.event_stream.add_event(event, EventSource.USER),
+                self.agent_session.event_stream.async_add_event(
+                    event, EventSource.USER
+                ),
                 self.agent_session.loop,
             )  # type: ignore
 

From e78af120808b7f512a492e8605a0098940338d55 Mon Sep 17 00:00:00 2001
From: Robert Brennan <contact@rbren.io>
Date: Wed, 30 Oct 2024 17:05:59 -0400
Subject: [PATCH 31/60] add id for auth errors

---
 frontend/src/i18n/translation.json       | 6 ++++++
 openhands/controller/agent_controller.py | 2 +-
 openhands/server/session/session.py      | 3 ---
 3 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/frontend/src/i18n/translation.json b/frontend/src/i18n/translation.json
index 795c60e051f..3893d9f4f68 100644
--- a/frontend/src/i18n/translation.json
+++ b/frontend/src/i18n/translation.json
@@ -1510,5 +1510,11 @@
     "ar": "في انتظار جاهزية العميل...",
     "fr": "En attente que le client soit prêt...",
     "tr": "İstemcinin hazır olması bekleniyor..."
+  },
+  "STATUS$ERROR_LLM_AUTHENTICATION": {
+    "en": "Error authenticating with the LLM provider. Please check your API key"
+  },
+  "STATUS$ERROR_RUNTIME_DISCONNECTED": {
+    "en": "There was an error while connecting to the runtime. Please refresh the page."
   }
 }
diff --git a/openhands/controller/agent_controller.py b/openhands/controller/agent_controller.py
index 8ffc278a753..4d572f8c390 100644
--- a/openhands/controller/agent_controller.py
+++ b/openhands/controller/agent_controller.py
@@ -153,7 +153,7 @@ async def _react_to_exception(
         if self._status_callback is not None:
             err_id = ''
             if isinstance(e, litellm.AuthenticationError):
-                err_id = 'llm_authentication_error'
+                err_id = 'STATUS$ERROR_LLM_AUTHENTICATION'
             self._status_callback('error', err_id, str(e))
 
     async def start_step_loop(self):
diff --git a/openhands/server/session/session.py b/openhands/server/session/session.py
index a2ad87d214f..f5bdc20e0c6 100644
--- a/openhands/server/session/session.py
+++ b/openhands/server/session/session.py
@@ -52,7 +52,6 @@ def __init__(
         self.loop = asyncio.get_event_loop()
 
     async def close(self):
-        print('CLOSE SESSION')
         self.is_alive = False
         await self.agent_session.close()
 
@@ -68,11 +67,9 @@ async def loop_recv(self):
                     continue
                 await self.dispatch(data)
         except WebSocketDisconnect:
-            print('WebSocketDisconnect, closing!')
             await self.close()
             logger.debug('WebSocket disconnected, sid: %s', self.sid)
         except RuntimeError as e:
-            print('RuntimeError, closing!', e)
             await self.close()
             logger.exception('Error in loop_recv: %s', e)
 

From 7b7b306bf7e6ec71faa28e4f4ae474ee588cc32e Mon Sep 17 00:00:00 2001
From: Robert Brennan <contact@rbren.io>
Date: Wed, 30 Oct 2024 17:35:30 -0400
Subject: [PATCH 32/60] better error messages\, use messages instead of toast

---
 frontend/src/components/chat-interface.tsx |  2 +-
 frontend/src/components/error-message.tsx  | 25 +++++++++++++++++++---
 frontend/src/i18n/translation.json         |  6 ++++++
 frontend/src/services/actions.ts           | 20 +++++++++++------
 frontend/src/state/chatSlice.ts            |  6 +++---
 openhands/controller/agent_controller.py   |  1 -
 openhands/runtime/base.py                  |  2 +-
 7 files changed, 47 insertions(+), 15 deletions(-)

diff --git a/frontend/src/components/chat-interface.tsx b/frontend/src/components/chat-interface.tsx
index 10786c13923..0eaaef0bb6c 100644
--- a/frontend/src/components/chat-interface.tsx
+++ b/frontend/src/components/chat-interface.tsx
@@ -106,7 +106,7 @@ export function ChatInterface() {
           isErrorMessage(message) ? (
             <ErrorMessage
               key={index}
-              error={message.error}
+              id={message.id}
               message={message.message}
             />
           ) : (
diff --git a/frontend/src/components/error-message.tsx b/frontend/src/components/error-message.tsx
index cded8c3729f..7bc14c73b41 100644
--- a/frontend/src/components/error-message.tsx
+++ b/frontend/src/components/error-message.tsx
@@ -1,14 +1,33 @@
+import { useState, useEffect } from "react";
+import { useTranslation } from "react-i18next";
+
 interface ErrorMessageProps {
   error: string;
   message: string;
 }
 
-export function ErrorMessage({ error, message }: ErrorMessageProps) {
+export function ErrorMessage({ id, message }: ErrorMessageProps) {
+  const { t, i18n } = useTranslation();
+  const [showDetails, setShowDetails] = useState(true);
+  const [headline, setHeadline] = useState('');
+  const [details, setDetails] = useState(message);
+
+  useEffect(() => {
+    if (i18n.exists(id)) {
+      setHeadline(t(id));
+      setDetails(message);
+      setShowDetails(false);
+    }
+  }, [id, message, i18n.language]);
+
   return (
     <div className="flex gap-2 items-center justify-start border-l-2 border-danger pl-2 my-2 py-2">
       <div className="text-sm leading-4 flex flex-col gap-2">
-        <p className="text-danger font-bold">{error}</p>
-        <p className="text-neutral-300">{message}</p>
+        {(headline && <p className="text-danger font-bold">{headline}</p>)}
+        <a onClick={() => setShowDetails(!showDetails)} className="cursor-pointer">
+        {(showDetails ? t('ERROR_MESSAGE$HIDE_DETAILS') : t('ERROR_MESSAGE$SHOW_DETAILS'))}
+        </a>
+        {(showDetails && <p className="text-neutral-300">{details}</p>)}
       </div>
     </div>
   );
diff --git a/frontend/src/i18n/translation.json b/frontend/src/i18n/translation.json
index 3893d9f4f68..ab73c4f86e2 100644
--- a/frontend/src/i18n/translation.json
+++ b/frontend/src/i18n/translation.json
@@ -1441,6 +1441,12 @@
     "fr": "Privé",
     "tr": "Özel"
   },
+  "ERROR_MESSAGE$SHOW_DETAILS": {
+    "en": "Show details"
+  },
+  "ERROR_MESSAGE$HIDE_DETAILS": {
+    "en": "Hide details"
+  },
   "STATUS$STARTING_RUNTIME": {
     "en": "Starting Runtime...",
     "zh-CN": "启动运行时...",
diff --git a/frontend/src/services/actions.ts b/frontend/src/services/actions.ts
index 620f4513113..36e6e473021 100644
--- a/frontend/src/services/actions.ts
+++ b/frontend/src/services/actions.ts
@@ -1,4 +1,4 @@
-import { addAssistantMessage, addUserMessage } from "#/state/chatSlice";
+import { addAssistantMessage, addUserMessage, addErrorMessage } from "#/state/chatSlice";
 import { setCode, setActiveFilepath } from "#/state/codeSlice";
 import { appendJupyterInput } from "#/state/jupyterSlice";
 import {
@@ -119,11 +119,19 @@ export function handleActionMessage(message: ActionMessage) {
 }
 
 export function handleStatusMessage(message: StatusMessage) {
-  store.dispatch(
-    setCurStatusMessage({
-      ...message,
-    }),
-  );
+  if (message.type === 'info') {
+    store.dispatch(
+      setCurStatusMessage({
+        ...message,
+      }),
+    );
+  } else if (message.type === 'error') {
+    store.dispatch(
+      addErrorMessage({
+        ...message,
+      }),
+    );
+  }
 }
 
 export function handleAssistantMessage(data: string | SocketMessage) {
diff --git a/frontend/src/state/chatSlice.ts b/frontend/src/state/chatSlice.ts
index 46f156ebddb..375e3ba260f 100644
--- a/frontend/src/state/chatSlice.ts
+++ b/frontend/src/state/chatSlice.ts
@@ -39,10 +39,10 @@ export const chatSlice = createSlice({
 
     addErrorMessage(
       state,
-      action: PayloadAction<{ error: string; message: string }>,
+      action: PayloadAction<{ id: string, message: string }>,
     ) {
-      const { error, message } = action.payload;
-      state.messages.push({ error, message });
+      const { id, message } = action.payload;
+      state.messages.push({ id, message, error: true });
     },
 
     clearMessages(state) {
diff --git a/openhands/controller/agent_controller.py b/openhands/controller/agent_controller.py
index 4d572f8c390..8d6a55e0b52 100644
--- a/openhands/controller/agent_controller.py
+++ b/openhands/controller/agent_controller.py
@@ -325,7 +325,6 @@ async def set_agent_state_to(self, new_state: AgentState):
         await self.event_stream.async_add_event(
             AgentStateChangedObservation('', self.state.agent_state), EventSource.AGENT
         )
-        print('sent state change obs')
 
         if new_state == AgentState.INIT and self.state.resume_state:
             await self.set_agent_state_to(self.state.resume_state)
diff --git a/openhands/runtime/base.py b/openhands/runtime/base.py
index d3b236131d1..d5076aadbc5 100644
--- a/openhands/runtime/base.py
+++ b/openhands/runtime/base.py
@@ -109,7 +109,7 @@ def send_status_message(self, message_id: str):
         """Sends a status message if the callback function was provided."""
         if self.status_callback:
             msg = STATUS_MESSAGES.get(message_id, '')
-            self.status_callback('status', message_id, msg)
+            self.status_callback('info', message_id, msg)
 
     # ====================================================================
 

From baf2c11ea5d873b08aa485aac51e6880d286b398 Mon Sep 17 00:00:00 2001
From: Robert Brennan <contact@rbren.io>
Date: Wed, 30 Oct 2024 17:47:29 -0400
Subject: [PATCH 33/60] no more retrys

---
 openhands/runtime/builder/remote.py           | 11 ++---
 .../impl/eventstream/eventstream_runtime.py   | 18 +++----
 .../runtime/impl/remote/remote_runtime.py     | 40 ++++++----------
 openhands/runtime/utils/request.py            | 47 +++----------------
 4 files changed, 35 insertions(+), 81 deletions(-)

diff --git a/openhands/runtime/builder/remote.py b/openhands/runtime/builder/remote.py
index f96afb38eeb..f7000a8244e 100644
--- a/openhands/runtime/builder/remote.py
+++ b/openhands/runtime/builder/remote.py
@@ -7,7 +7,7 @@
 
 from openhands.core.logger import openhands_logger as logger
 from openhands.runtime.builder import RuntimeBuilder
-from openhands.runtime.utils.request import is_429_error, send_request_with_retry
+from openhands.runtime.utils.request import send_request
 from openhands.runtime.utils.shutdown_listener import (
     should_continue,
     sleep_if_should_continue,
@@ -45,13 +45,12 @@ def build(self, path: str, tags: list[str], platform: str | None = None) -> str:
             files.append(('tags', (None, tag)))
 
         # Send the POST request to /build (Begins the build process)
-        response = send_request_with_retry(
+        response = send_request(
             self.session,
             'POST',
             f'{self.api_url}/build',
             files=files,
             timeout=30,
-            retry_fns=[is_429_error],
         )
 
         if response.status_code != 202:
@@ -70,12 +69,11 @@ def build(self, path: str, tags: list[str], platform: str | None = None) -> str:
                 logger.error('Build timed out after 30 minutes')
                 raise RuntimeError('Build timed out after 30 minutes')
 
-            status_response = send_request_with_retry(
+            status_response = send_request(
                 self.session,
                 'GET',
                 f'{self.api_url}/build_status',
                 params={'build_id': build_id},
-                timeout=30,
             )
 
             if status_response.status_code != 200:
@@ -112,12 +110,11 @@ def build(self, path: str, tags: list[str], platform: str | None = None) -> str:
     def image_exists(self, image_name: str, pull_from_repo: bool = True) -> bool:
         """Checks if an image exists in the remote registry using the /image_exists endpoint."""
         params = {'image': image_name}
-        response = send_request_with_retry(
+        response = send_request(
             self.session,
             'GET',
             f'{self.api_url}/image_exists',
             params=params,
-            timeout=30,
         )
 
         if response.status_code != 200:
diff --git a/openhands/runtime/impl/eventstream/eventstream_runtime.py b/openhands/runtime/impl/eventstream/eventstream_runtime.py
index 7ac78b183b9..3362e5162ea 100644
--- a/openhands/runtime/impl/eventstream/eventstream_runtime.py
+++ b/openhands/runtime/impl/eventstream/eventstream_runtime.py
@@ -36,7 +36,7 @@
 from openhands.runtime.builder import DockerRuntimeBuilder
 from openhands.runtime.plugins import PluginRequirement
 from openhands.runtime.utils import find_available_tcp_port
-from openhands.runtime.utils.request import send_request_with_retry
+from openhands.runtime.utils.request import send_request
 from openhands.runtime.utils.runtime_build import build_runtime_image
 from openhands.utils.tenacity_stop import stop_if_should_exit
 
@@ -304,7 +304,7 @@ def _init_container(self):
                 volumes = None
             self.log(
                 'debug',
-                f'Sandbox workspace: {self.config.workspace_mount_path_in_sandbox}'
+                f'Sandbox workspace: {self.config.workspace_mount_path_in_sandbox}',
             )
 
             if self.config.sandbox.browsergym_eval_env is not None:
@@ -392,12 +392,12 @@ def _wait_until_alive(self):
         if not self.log_buffer:
             raise RuntimeError('Runtime client is not ready.')
 
-        response = send_request_with_retry(
+        response = send_request(
             self.session,
             'GET',
             f'{self.api_url}/alive',
             retry_exceptions=[ConnectionRefusedError],
-            timeout=300,  # 5 minutes gives the container time to be alive 🧟‍♂️
+            timeout=5,
         )
         if response.status_code == 200:
             return
@@ -486,7 +486,7 @@ def run_action(self, action: Action) -> Observation:
             assert action.timeout is not None
 
             try:
-                response = send_request_with_retry(
+                response = send_request(
                     self.session,
                     'POST',
                     f'{self.api_url}/execute_action',
@@ -569,7 +569,7 @@ def copy_to(
 
             params = {'destination': sandbox_dest, 'recursive': str(recursive).lower()}
 
-            response = send_request_with_retry(
+            response = send_request(
                 self.session,
                 'POST',
                 f'{self.api_url}/upload_file',
@@ -606,12 +606,12 @@ def list_files(self, path: str | None = None) -> list[str]:
             if path is not None:
                 data['path'] = path
 
-            response = send_request_with_retry(
+            response = send_request(
                 self.session,
                 'POST',
                 f'{self.api_url}/list_files',
                 json=data,
-                timeout=30,  # 30 seconds because the container should already be alive
+                timeout=10,
             )
             if response.status_code == 200:
                 response_json = response.json()
@@ -630,7 +630,7 @@ def copy_from(self, path: str) -> bytes:
         self._refresh_logs()
         try:
             params = {'path': path}
-            response = send_request_with_retry(
+            response = send_request(
                 self.session,
                 'GET',
                 f'{self.api_url}/download_files',
diff --git a/openhands/runtime/impl/remote/remote_runtime.py b/openhands/runtime/impl/remote/remote_runtime.py
index a628db49654..c9f9d230aa5 100644
--- a/openhands/runtime/impl/remote/remote_runtime.py
+++ b/openhands/runtime/impl/remote/remote_runtime.py
@@ -32,9 +32,7 @@
 from openhands.runtime.plugins import PluginRequirement
 from openhands.runtime.utils.command import get_remote_startup_command
 from openhands.runtime.utils.request import (
-    is_404_error,
-    is_503_error,
-    send_request_with_retry,
+    send_request,
 )
 from openhands.runtime.utils.runtime_build import build_runtime_image
 
@@ -127,7 +125,7 @@ def _start_or_attach_to_runtime(self):
 
     def _check_existing_runtime(self) -> bool:
         try:
-            response = send_request_with_retry(
+            response = send_request(
                 self.session,
                 'GET',
                 f'{self.config.sandbox.remote_runtime_api_url}/runtime/{self.sid}',
@@ -160,11 +158,11 @@ def _check_existing_runtime(self) -> bool:
 
     def _build_runtime(self):
         self.log('debug', f'Building RemoteRuntime config:\n{self.config}')
-        response = send_request_with_retry(
+        response = send_request(
             self.session,
             'GET',
             f'{self.config.sandbox.remote_runtime_api_url}/registry_prefix',
-            timeout=30,
+            timeout=10,
         )
         response_json = response.json()
         registry_prefix = response_json['registry_prefix']
@@ -191,12 +189,12 @@ def _build_runtime(self):
             force_rebuild=self.config.sandbox.force_rebuild_runtime,
         )
 
-        response = send_request_with_retry(
+        response = send_request(
             self.session,
             'GET',
             f'{self.config.sandbox.remote_runtime_api_url}/image_exists',
             params={'image': self.container_image},
-            timeout=30,
+            timeout=10,
         )
         if response.status_code != 200 or not response.json()['exists']:
             raise RuntimeError(f'Container image {self.container_image} does not exist')
@@ -228,12 +226,11 @@ def _start_runtime(self):
         }
 
         # Start the sandbox using the /start endpoint
-        response = send_request_with_retry(
+        response = send_request(
             self.session,
             'POST',
             f'{self.config.sandbox.remote_runtime_api_url}/start',
             json=start_request,
-            timeout=300,
         )
         if response.status_code != 201:
             raise RuntimeError(
@@ -246,7 +243,7 @@ def _start_runtime(self):
         )
 
     def _resume_runtime(self):
-        response = send_request_with_retry(
+        response = send_request(
             self.session,
             'POST',
             f'{self.config.sandbox.remote_runtime_api_url}/resume',
@@ -275,11 +272,10 @@ def _wait_until_alive(self):
         max_not_found_count = 12  # 2 minutes
         not_found_count = 0
         while not pod_running:
-            runtime_info_response = send_request_with_retry(
+            runtime_info_response = send_request(
                 self.session,
                 'GET',
                 f'{self.config.sandbox.remote_runtime_api_url}/runtime/{self.runtime_id}',
-                timeout=5,
             )
             if runtime_info_response.status_code != 200:
                 raise RuntimeError(
@@ -310,16 +306,10 @@ def _wait_until_alive(self):
             # Pending otherwise - add proper sleep
             time.sleep(10)
 
-        response = send_request_with_retry(
+        response = send_request(
             self.session,
             'GET',
             f'{self.runtime_url}/alive',
-            # Retry 404 & 503 errors for the /alive endpoint
-            # because the runtime might just be starting up
-            # and have not registered the endpoint yet
-            retry_fns=[is_404_error, is_503_error],
-            # leave enough time for the runtime to start up
-            timeout=600,
         )
         if response.status_code != 200:
             msg = f'Runtime (ID={self.runtime_id}) is not alive yet. Status: {response.status_code}.'
@@ -332,7 +322,7 @@ def close(self, timeout: int = 10):
             return
         if self.runtime_id:
             try:
-                response = send_request_with_retry(
+                response = send_request(
                     self.session,
                     'POST',
                     f'{self.config.sandbox.remote_runtime_api_url}/stop',
@@ -374,7 +364,7 @@ def run_action(self, action: Action) -> Observation:
             try:
                 request_body = {'action': event_to_dict(action)}
                 self.log('debug', f'Request body: {request_body}')
-                response = send_request_with_retry(
+                response = send_request(
                     self.session,
                     'POST',
                     f'{self.runtime_url}/execute_action',
@@ -448,7 +438,7 @@ def copy_to(
 
             params = {'destination': sandbox_dest, 'recursive': str(recursive).lower()}
 
-            response = send_request_with_retry(
+            response = send_request(
                 self.session,
                 'POST',
                 f'{self.runtime_url}/upload_file',
@@ -488,7 +478,7 @@ def list_files(self, path: str | None = None) -> list[str]:
             if path is not None:
                 data['path'] = path
 
-            response = send_request_with_retry(
+            response = send_request(
                 self.session,
                 'POST',
                 f'{self.runtime_url}/list_files',
@@ -517,7 +507,7 @@ def copy_from(self, path: str) -> bytes:
         """Zip all files in the sandbox and return as a stream of bytes."""
         try:
             params = {'path': path}
-            response = send_request_with_retry(
+            response = send_request(
                 self.session,
                 'GET',
                 f'{self.runtime_url}/download_files',
diff --git a/openhands/runtime/utils/request.py b/openhands/runtime/utils/request.py
index 6940827a3ad..3b654c7aca4 100644
--- a/openhands/runtime/utils/request.py
+++ b/openhands/runtime/utils/request.py
@@ -1,22 +1,12 @@
-from typing import Any, Callable, Type
+from typing import Any
 
 import requests
 from requests.exceptions import (
     ChunkedEncodingError,
     ConnectionError,
 )
-from tenacity import (
-    retry,
-    retry_if_exception,
-    retry_if_exception_type,
-    stop_after_delay,
-    wait_exponential,
-)
 from urllib3.exceptions import IncompleteRead
 
-from openhands.core.logger import openhands_logger as logger
-from openhands.utils.tenacity_stop import stop_if_should_exit
-
 
 def is_server_error(exception):
     return (
@@ -60,37 +50,14 @@ def is_502_error(exception):
 ]
 
 
-def send_request_with_retry(
+def send_request(
     session: requests.Session,
     method: str,
     url: str,
-    timeout: int,
-    retry_exceptions: list[Type[Exception]] | None = None,
-    retry_fns: list[Callable[[Exception], bool]] | None = None,
+    timeout: int = 10,
     **kwargs: Any,
 ) -> requests.Response:
-    exceptions_to_catch = retry_exceptions or DEFAULT_RETRY_EXCEPTIONS
-    retry_condition = retry_if_exception_type(
-        tuple(exceptions_to_catch)
-    ) | retry_if_exception(is_502_error)
-    if retry_fns is not None:
-        for fn in retry_fns:
-            retry_condition |= retry_if_exception(fn)
-    # wait a few more seconds to get the timeout error from client side
-    kwargs['timeout'] = timeout + 10
-
-    @retry(
-        stop=stop_after_delay(timeout) | stop_if_should_exit(),
-        wait=wait_exponential(multiplier=1, min=4, max=20),
-        retry=retry_condition,
-        reraise=True,
-        before_sleep=lambda retry_state: logger.debug(
-            f'Retrying {method} request to {url} due to {retry_state.outcome.exception()}. Attempt {retry_state.attempt_number}'
-        ),
-    )
-    def _send_request_with_retry():
-        response = session.request(method, url, **kwargs)
-        response.raise_for_status()
-        return response
-
-    return _send_request_with_retry()
+    kwargs['timeout'] = timeout
+    response = session.request(method, url, **kwargs)
+    response.raise_for_status()
+    return response

From 4be71e1c31491cba75357d8cd9ca3dc67d6b85b5 Mon Sep 17 00:00:00 2001
From: Robert Brennan <contact@rbren.io>
Date: Wed, 30 Oct 2024 17:49:49 -0400
Subject: [PATCH 34/60] fix no headline

---
 frontend/src/components/error-message.tsx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/frontend/src/components/error-message.tsx b/frontend/src/components/error-message.tsx
index 7bc14c73b41..439067ffc9a 100644
--- a/frontend/src/components/error-message.tsx
+++ b/frontend/src/components/error-message.tsx
@@ -25,7 +25,7 @@ export function ErrorMessage({ id, message }: ErrorMessageProps) {
       <div className="text-sm leading-4 flex flex-col gap-2">
         {(headline && <p className="text-danger font-bold">{headline}</p>)}
         <a onClick={() => setShowDetails(!showDetails)} className="cursor-pointer">
-        {(showDetails ? t('ERROR_MESSAGE$HIDE_DETAILS') : t('ERROR_MESSAGE$SHOW_DETAILS'))}
+        {(headline && (showDetails ? t('ERROR_MESSAGE$HIDE_DETAILS') : t('ERROR_MESSAGE$SHOW_DETAILS')))}
         </a>
         {(showDetails && <p className="text-neutral-300">{details}</p>)}
       </div>

From 3e07a2dd187a13090d4e4d529bce852969c07115 Mon Sep 17 00:00:00 2001
From: Robert Brennan <contact@rbren.io>
Date: Wed, 30 Oct 2024 17:54:39 -0400
Subject: [PATCH 35/60] fix call

---
 openhands/runtime/impl/eventstream/eventstream_runtime.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/openhands/runtime/impl/eventstream/eventstream_runtime.py b/openhands/runtime/impl/eventstream/eventstream_runtime.py
index 3362e5162ea..318e7d07355 100644
--- a/openhands/runtime/impl/eventstream/eventstream_runtime.py
+++ b/openhands/runtime/impl/eventstream/eventstream_runtime.py
@@ -396,7 +396,6 @@ def _wait_until_alive(self):
             self.session,
             'GET',
             f'{self.api_url}/alive',
-            retry_exceptions=[ConnectionRefusedError],
             timeout=5,
         )
         if response.status_code == 200:

From c6fd5ac218cf64da4a88a2ac1f3de8b58c02f64c Mon Sep 17 00:00:00 2001
From: Robert Brennan <contact@rbren.io>
Date: Wed, 30 Oct 2024 19:21:20 -0400
Subject: [PATCH 36/60] runtime errors plumbing through properly

---
 openhands/runtime/base.py                     | 25 ++++++++++++++++---
 .../impl/eventstream/eventstream_runtime.py   | 13 ++++------
 openhands/server/listen.py                    | 10 +++++---
 openhands/server/session/agent_session.py     |  8 ++++++
 openhands/server/session/session.py           |  9 ++++---
 5 files changed, 48 insertions(+), 17 deletions(-)

diff --git a/openhands/runtime/base.py b/openhands/runtime/base.py
index d5076aadbc5..c1471dfc438 100644
--- a/openhands/runtime/base.py
+++ b/openhands/runtime/base.py
@@ -5,6 +5,8 @@
 from abc import abstractmethod
 from typing import Callable
 
+from requests.exceptions import ConnectionError
+
 from openhands.core.config import AppConfig, SandboxConfig
 from openhands.core.logger import openhands_logger as logger
 from openhands.events import EventSource, EventStream, EventStreamSubscriber
@@ -111,6 +113,11 @@ def send_status_message(self, message_id: str):
             msg = STATUS_MESSAGES.get(message_id, '')
             self.status_callback('info', message_id, msg)
 
+    def send_error_message(self, message_id: str, message: str):
+        if self.status_callback:
+            print('RUNTIME STATUS CALLBACK', message_id, message)
+            self.status_callback('error', message_id, message)
+
     # ====================================================================
 
     def add_env_vars(self, env_vars: dict[str, str]) -> None:
@@ -145,9 +152,21 @@ async def on_event(self, event: Event) -> None:
             if event.timeout is None:
                 event.timeout = self.config.sandbox.timeout
             assert event.timeout is not None
-            observation: Observation = await call_sync_from_async(
-                self.run_action, event
-            )
+            try:
+                observation: Observation = await call_sync_from_async(
+                    self.run_action, event
+                )
+            except Exception as e:
+                err_id = ''
+                print('e class', e.__class__)
+                if isinstance(e, ConnectionError):
+                    err_id = 'STATUS$ERROR_RUNTIME_DISCONNECTED'
+                self.log('error', f'Unexpected error while running action {e}')
+                self.log('error', f'Problematic action: {str(event)}')
+                self.send_error_message(err_id, str(e))
+                self.close()
+                return
+
             observation._cause = event.id  # type: ignore[attr-defined]
             observation.tool_call_metadata = event.tool_call_metadata
             source = event.source if event.source else EventSource.AGENT
diff --git a/openhands/runtime/impl/eventstream/eventstream_runtime.py b/openhands/runtime/impl/eventstream/eventstream_runtime.py
index 318e7d07355..df7b9110676 100644
--- a/openhands/runtime/impl/eventstream/eventstream_runtime.py
+++ b/openhands/runtime/impl/eventstream/eventstream_runtime.py
@@ -38,6 +38,7 @@
 from openhands.runtime.utils import find_available_tcp_port
 from openhands.runtime.utils.request import send_request
 from openhands.runtime.utils.runtime_build import build_runtime_image
+from openhands.utils.async_utils import call_sync_from_async
 from openhands.utils.tenacity_stop import stop_if_should_exit
 
 
@@ -205,21 +206,21 @@ async def connect(self):
             self.log(
                 'info', f'Starting runtime with image: {self.runtime_container_image}'
             )
-            self._init_container()
+            await call_sync_from_async(self._init_container)
             self.log('info', f'Container started: {self.container_name}')
 
         else:
-            self._attach_to_container()
+            await call_sync_from_async(self._attach_to_container)
 
         if not self.attach_to_existing:
             self.log('info', f'Waiting for client to become ready at {self.api_url}...')
         self.send_status_message('STATUS$WAITING_FOR_CLIENT')
-        self._wait_until_alive()
+        await call_sync_from_async(self._wait_until_alive)
         if not self.attach_to_existing:
             self.log('info', 'Runtime is ready.')
 
         if not self.attach_to_existing:
-            self.setup_initial_env()
+            await call_sync_from_async(self.setup_initial_env)
 
         self.log(
             'debug',
@@ -384,7 +385,6 @@ def _refresh_logs(self):
 
     @tenacity.retry(
         stop=tenacity.stop_after_delay(120) | stop_if_should_exit(),
-        wait=tenacity.wait_exponential(multiplier=2, min=1, max=20),
         reraise=(ConnectionRefusedError,),
     )
     def _wait_until_alive(self):
@@ -509,9 +509,6 @@ def run_action(self, action: Action) -> Observation:
                 obs = ErrorObservation(
                     f'Action execution timed out after {action.timeout} seconds.',
                 )
-            except Exception as e:
-                self.log('error', f'Error during action execution: {e}')
-                obs = ErrorObservation(f'Action execution failed: {str(e)}')
             self._refresh_logs()
             return obs
 
diff --git a/openhands/server/listen.py b/openhands/server/listen.py
index fc740e80293..b5752dcee1e 100644
--- a/openhands/server/listen.py
+++ b/openhands/server/listen.py
@@ -236,9 +236,11 @@ async def attach_session(request: Request, call_next):
             content={'error': 'Invalid token'},
         )
 
+    print('CONNECTING TO CONVO')
     request.state.conversation = await session_manager.attach_to_conversation(
         request.state.sid
     )
+    print('CONNECTED TO CONVO')
     if request.state.conversation is None:
         return JSONResponse(
             status_code=status.HTTP_404_NOT_FOUND,
@@ -477,11 +479,13 @@ async def list_files(request: Request, path: str | None = None):
 
     file_list = [f for f in file_list if f not in FILES_TO_IGNORE]
 
-    def filter_for_gitignore(file_list, base_path):
+    async def filter_for_gitignore(file_list, base_path):
         gitignore_path = os.path.join(base_path, '.gitignore')
         try:
             read_action = FileReadAction(gitignore_path)
-            observation = runtime.run_action(read_action)
+            observation = await asyncio.create_task(
+                call_sync_from_async(runtime.run_action, read_action)
+            )
             spec = PathSpec.from_lines(
                 GitWildMatchPattern, observation.content.splitlines()
             )
@@ -491,7 +495,7 @@ def filter_for_gitignore(file_list, base_path):
         file_list = [entry for entry in file_list if not spec.match_file(entry)]
         return file_list
 
-    file_list = filter_for_gitignore(file_list, '')
+    file_list = await filter_for_gitignore(file_list, '')
 
     return file_list
 
diff --git a/openhands/server/session/agent_session.py b/openhands/server/session/agent_session.py
index b7fd08ba823..fc5925cc2c2 100644
--- a/openhands/server/session/agent_session.py
+++ b/openhands/server/session/agent_session.py
@@ -145,6 +145,10 @@ async def close(self):
 
         self._closed = True
 
+    async def stop_agent_loop_for_error(self):
+        if self.controller is not None:
+            await self.controller.set_agent_state_to(AgentState.ERROR)
+
     def _create_security_analyzer(self, security_analyzer: str | None):
         """Creates a SecurityAnalyzer instance that will be used to analyze the agent actions
 
@@ -189,6 +193,10 @@ async def _create_runtime(
             await self.runtime.connect()
         except Exception as e:
             logger.error(f'Runtime initialization failed: {e}', exc_info=True)
+            if self._status_callback:
+                self._status_callback(
+                    'error', 'STATUS$ERROR_RUNTIME_DISCONNECTED', str(e)
+                )
             raise
 
         if self.runtime is not None:
diff --git a/openhands/server/session/session.py b/openhands/server/session/session.py
index f5bdc20e0c6..158ea05e593 100644
--- a/openhands/server/session/session.py
+++ b/openhands/server/session/session.py
@@ -197,8 +197,11 @@ async def send_message(self, message: str) -> bool:
         """Sends a message to the client."""
         return await self.send({'message': message})
 
-    async def send_status_message(self, msg_type: str, id: str, message: str) -> bool:
+    async def _send_status_message(self, msg_type: str, id: str, message: str) -> bool:
         """Sends a status message to the client."""
+        if msg_type == 'error':
+            await self.agent_session.stop_agent_loop_for_error()
+
         return await self.send(
             {'status_update': True, 'type': msg_type, 'id': id, 'message': message}
         )
@@ -217,7 +220,7 @@ def load_from_data(self, data: dict) -> bool:
 
     def queue_status_message(self, msg_type: str, id: str, message: str):
         """Queues a status message to be sent asynchronously."""
-        # Ensure the coroutine runs in the main event loop
+        print('QUEUE STATUS MESSAGE', msg_type, id, message)
         asyncio.run_coroutine_threadsafe(
-            self.send_status_message(msg_type, id, message), self.loop
+            self._send_status_message(msg_type, id, message), self.loop
         )

From 0012ed4616f4cc77af4638a645968bd6f6c0eb2c Mon Sep 17 00:00:00 2001
From: Robert Brennan <contact@rbren.io>
Date: Wed, 30 Oct 2024 19:22:01 -0400
Subject: [PATCH 37/60] logspam

---
 openhands/runtime/base.py           | 2 --
 openhands/server/listen.py          | 2 --
 openhands/server/session/session.py | 1 -
 3 files changed, 5 deletions(-)

diff --git a/openhands/runtime/base.py b/openhands/runtime/base.py
index c1471dfc438..48a2905fe9a 100644
--- a/openhands/runtime/base.py
+++ b/openhands/runtime/base.py
@@ -115,7 +115,6 @@ def send_status_message(self, message_id: str):
 
     def send_error_message(self, message_id: str, message: str):
         if self.status_callback:
-            print('RUNTIME STATUS CALLBACK', message_id, message)
             self.status_callback('error', message_id, message)
 
     # ====================================================================
@@ -158,7 +157,6 @@ async def on_event(self, event: Event) -> None:
                 )
             except Exception as e:
                 err_id = ''
-                print('e class', e.__class__)
                 if isinstance(e, ConnectionError):
                     err_id = 'STATUS$ERROR_RUNTIME_DISCONNECTED'
                 self.log('error', f'Unexpected error while running action {e}')
diff --git a/openhands/server/listen.py b/openhands/server/listen.py
index b5752dcee1e..dceda9cb952 100644
--- a/openhands/server/listen.py
+++ b/openhands/server/listen.py
@@ -236,11 +236,9 @@ async def attach_session(request: Request, call_next):
             content={'error': 'Invalid token'},
         )
 
-    print('CONNECTING TO CONVO')
     request.state.conversation = await session_manager.attach_to_conversation(
         request.state.sid
     )
-    print('CONNECTED TO CONVO')
     if request.state.conversation is None:
         return JSONResponse(
             status_code=status.HTTP_404_NOT_FOUND,
diff --git a/openhands/server/session/session.py b/openhands/server/session/session.py
index 158ea05e593..f4274eae2f8 100644
--- a/openhands/server/session/session.py
+++ b/openhands/server/session/session.py
@@ -220,7 +220,6 @@ def load_from_data(self, data: dict) -> bool:
 
     def queue_status_message(self, msg_type: str, id: str, message: str):
         """Queues a status message to be sent asynchronously."""
-        print('QUEUE STATUS MESSAGE', msg_type, id, message)
         asyncio.run_coroutine_threadsafe(
             self._send_status_message(msg_type, id, message), self.loop
         )

From a81975f60753875e314f339461f949b0d81301fb Mon Sep 17 00:00:00 2001
From: Robert Brennan <contact@rbren.io>
Date: Wed, 30 Oct 2024 19:23:36 -0400
Subject: [PATCH 38/60] remove another retry

---
 openhands/runtime/impl/eventstream/eventstream_runtime.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/openhands/runtime/impl/eventstream/eventstream_runtime.py b/openhands/runtime/impl/eventstream/eventstream_runtime.py
index df7b9110676..bef8243303d 100644
--- a/openhands/runtime/impl/eventstream/eventstream_runtime.py
+++ b/openhands/runtime/impl/eventstream/eventstream_runtime.py
@@ -239,10 +239,6 @@ def _init_docker_client() -> docker.DockerClient:
             )
             raise ex
 
-    @tenacity.retry(
-        stop=tenacity.stop_after_attempt(5) | stop_if_should_exit(),
-        wait=tenacity.wait_exponential(multiplier=1, min=4, max=60),
-    )
     def _init_container(self):
         try:
             self.log('debug', 'Preparing to start container...')

From 779889698e850f23cf49958d8efdffc57eb5eddf Mon Sep 17 00:00:00 2001
From: Robert Brennan <contact@rbren.io>
Date: Wed, 30 Oct 2024 20:28:42 -0400
Subject: [PATCH 39/60] refactor remote runtime error handling

---
 openhands/runtime/base.py                     |  12 +-
 .../runtime/impl/remote/remote_runtime.py     | 229 +++++++-----------
 2 files changed, 103 insertions(+), 138 deletions(-)

diff --git a/openhands/runtime/base.py b/openhands/runtime/base.py
index 48a2905fe9a..f10066d44e5 100644
--- a/openhands/runtime/base.py
+++ b/openhands/runtime/base.py
@@ -42,6 +42,14 @@
 }
 
 
+class RuntimeNotReadyError(Exception):
+    pass
+
+
+class RuntimeDisconnectedError(Exception):
+    pass
+
+
 def _default_env_vars(sandbox_config: SandboxConfig) -> dict[str, str]:
     ret = {}
     for key in os.environ:
@@ -157,7 +165,9 @@ async def on_event(self, event: Event) -> None:
                 )
             except Exception as e:
                 err_id = ''
-                if isinstance(e, ConnectionError):
+                if isinstance(e, ConnectionError) or isinstance(
+                    e, RuntimeDisconnectedError
+                ):
                     err_id = 'STATUS$ERROR_RUNTIME_DISCONNECTED'
                 self.log('error', f'Unexpected error while running action {e}')
                 self.log('error', f'Problematic action: {str(event)}')
diff --git a/openhands/runtime/impl/remote/remote_runtime.py b/openhands/runtime/impl/remote/remote_runtime.py
index c9f9d230aa5..eac157e0515 100644
--- a/openhands/runtime/impl/remote/remote_runtime.py
+++ b/openhands/runtime/impl/remote/remote_runtime.py
@@ -1,12 +1,11 @@
 import os
 import tempfile
 import threading
-import time
 from typing import Callable, Optional
 from zipfile import ZipFile
 
 import requests
-from requests.exceptions import Timeout
+import tenacity
 
 from openhands.core.config import AppConfig
 from openhands.events import EventStream
@@ -27,7 +26,11 @@
 )
 from openhands.events.serialization import event_to_dict, observation_from_dict
 from openhands.events.serialization.action import ACTION_TYPE_TO_CLASS
-from openhands.runtime.base import Runtime
+from openhands.runtime.base import (
+    Runtime,
+    RuntimeDisconnectedError,
+    RuntimeNotReadyError,
+)
 from openhands.runtime.builder.remote import RemoteRuntimeBuilder
 from openhands.runtime.plugins import PluginRequirement
 from openhands.runtime.utils.command import get_remote_startup_command
@@ -35,6 +38,8 @@
     send_request,
 )
 from openhands.runtime.utils.runtime_build import build_runtime_image
+from openhands.utils.async_utils import call_sync_from_async
+from openhands.utils.tenacity_stop import stop_if_should_exit
 
 
 class RemoteRuntime(Runtime):
@@ -73,7 +78,7 @@ def __init__(
 
         if self.config.workspace_base is not None:
             self.log(
-                'warning',
+                'debug',
                 'Setting workspace_base is not supported in the remote runtime.',
             )
 
@@ -84,9 +89,9 @@ def __init__(
         self.runtime_url: str | None = None
 
     async def connect(self):
-        self._start_or_attach_to_runtime()
-        self._wait_until_alive()
-        self.setup_initial_env()
+        await call_sync_from_async(self._start_or_attach_to_runtime)
+        await call_sync_from_async(self._wait_until_alive)
+        await call_sync_from_async(self.setup_initial_env)
 
     def _start_or_attach_to_runtime(self):
         existing_runtime = self._check_existing_runtime()
@@ -265,56 +270,50 @@ def _parse_runtime_response(self, response: requests.Response):
                 {'X-Session-API-Key': start_response['session_api_key']}
             )
 
+    @tenacity.retry(
+        stop=tenacity.stop_after_delay(120) | stop_if_should_exit(),
+        reraise=True,
+        retry=tenacity.retry_if_exception_type(RuntimeNotReadyError),
+    )
     def _wait_until_alive(self):
         self.log('debug', f'Waiting for runtime to be alive at url: {self.runtime_url}')
-        # send GET request to /runtime/<id>
-        pod_running = False
-        max_not_found_count = 12  # 2 minutes
-        not_found_count = 0
-        while not pod_running:
-            runtime_info_response = send_request(
+        runtime_info_response = send_request(
+            self.session,
+            'GET',
+            f'{self.config.sandbox.remote_runtime_api_url}/runtime/{self.runtime_id}',
+        )
+        if runtime_info_response.status_code != 200:
+            raise RuntimeError(
+                f'Failed to get runtime status: {runtime_info_response.status_code}. Response: {runtime_info_response.text}'
+            )
+        runtime_data = runtime_info_response.json()
+        assert 'runtime_id' in runtime_data
+        assert 'pod_status' in runtime_data
+        self.runtime_id = runtime_data['runtime_id']
+        pod_status = runtime_data['pod_status']
+        if pod_status == 'Ready':
+            response = send_request(
                 self.session,
                 'GET',
-                f'{self.config.sandbox.remote_runtime_api_url}/runtime/{self.runtime_id}',
+                f'{self.runtime_url}/alive',
             )
-            if runtime_info_response.status_code != 200:
-                raise RuntimeError(
-                    f'Failed to get runtime status: {runtime_info_response.status_code}. Response: {runtime_info_response.text}'
-                )
-            runtime_data = runtime_info_response.json()
-            assert runtime_data['runtime_id'] == self.runtime_id
-            pod_status = runtime_data['pod_status']
-            self.log(
-                'debug',
-                f'Waiting for runtime pod to be active. Current status: {pod_status}',
+            if response.status_code != 200:
+                msg = f'Runtime (ID={self.runtime_id}) is not alive yet, even though it reported Ready. Status: {response.status_code}.'
+                self.log('warning', msg)
+                raise RuntimeError(msg)
+            return
+        if pod_status in ('Failed', 'Unknown', 'Not Found'):
+            # clean up the runtime
+            self.close()
+            raise RuntimeError(
+                f'Runtime (ID={self.runtime_id}) failed to start. Current status: {pod_status}'
             )
-            if pod_status == 'Ready':
-                pod_running = True
-                break
-            elif pod_status == 'Not Found' and not_found_count < max_not_found_count:
-                not_found_count += 1
-                self.log(
-                    'debug',
-                    f'Runtime pod not found. Count: {not_found_count} / {max_not_found_count}',
-                )
-            elif pod_status in ('Failed', 'Unknown', 'Not Found'):
-                # clean up the runtime
-                self.close()
-                raise RuntimeError(
-                    f'Runtime (ID={self.runtime_id}) failed to start. Current status: {pod_status}'
-                )
-            # Pending otherwise - add proper sleep
-            time.sleep(10)
 
-        response = send_request(
-            self.session,
-            'GET',
-            f'{self.runtime_url}/alive',
+        self.log(
+            'debug',
+            f'Waiting for runtime pod to be active. Current status: {pod_status}',
         )
-        if response.status_code != 200:
-            msg = f'Runtime (ID={self.runtime_id}) is not alive yet. Status: {response.status_code}.'
-            self.log('warning', msg)
-            raise RuntimeError(msg)
+        raise RuntimeNotReadyError()
 
     def close(self, timeout: int = 10):
         if self.config.sandbox.keep_remote_runtime_alive or self.attach_to_existing:
@@ -371,27 +370,31 @@ def run_action(self, action: Action) -> Observation:
                     json=request_body,
                     timeout=action.timeout,
                 )
-                if response.status_code == 200:
-                    output = response.json()
-                    obs = observation_from_dict(output)
-                    obs._cause = action.id  # type: ignore[attr-defined]
-                    return obs
-                else:
-                    error_message = response.text
-                    self.log('error', f'Error from server: {error_message}')
-                    obs = ErrorObservation(f'Action execution failed: {error_message}')
-            except Timeout:
-                self.log('error', 'No response received within the timeout period.')
+                output = response.json()
+                obs = observation_from_dict(output)
+                obs._cause = action.id  # type: ignore[attr-defined]
+            except requests.Timeout:
                 obs = ErrorObservation(
                     f'[Runtime (ID={self.runtime_id})] Action execution timed out',
                 )
-            except Exception as e:
-                self.log('error', f'Error during action execution: {e}')
-                obs = ErrorObservation(
-                    f'[Runtime (ID={self.runtime_id})] Action execution failed: {str(e)}',
-                )
+                return obs
             return obs
 
+    def _send_request(self, *args, **kwargs):
+        try:
+            return send_request(self.session, *args, **kwargs)
+        except requests.Timeout:
+            self.log('error', 'No response received within the timeout period.')
+            raise
+        except requests.HTTPError as e:
+            print('got http error', e)
+            if e.response.status_code == 404:
+                raise RuntimeDisconnectedError()
+            else:
+                raise e
+        except Exception as e:
+            print('Got unknown exception', e.__class__)
+
     def run(self, action: CmdRunAction) -> Observation:
         return self.run_action(action)
 
@@ -438,32 +441,16 @@ def copy_to(
 
             params = {'destination': sandbox_dest, 'recursive': str(recursive).lower()}
 
-            response = send_request(
-                self.session,
+            response = self._send_request(
                 'POST',
                 f'{self.runtime_url}/upload_file',
                 files=upload_data,
                 params=params,
                 timeout=300,
             )
-            if response.status_code == 200:
-                self.log(
-                    'debug',
-                    f'Copy completed: host:{host_src} -> runtime:{sandbox_dest}. Response: {response.text}',
-                )
-                return
-            else:
-                error_message = response.text
-                raise Exception(
-                    f'[Runtime (ID={self.runtime_id})] Copy operation failed: {error_message}'
-                )
-        except TimeoutError:
-            raise TimeoutError(
-                f'[Runtime (ID={self.runtime_id})] Copy operation timed out'
-            )
-        except Exception as e:
-            raise RuntimeError(
-                f'[Runtime (ID={self.runtime_id})] Copy operation failed: {str(e)}'
+            self.log(
+                'debug',
+                f'Copy completed: host:{host_src} -> runtime:{sandbox_dest}. Response: {response.text}',
             )
         finally:
             if recursive:
@@ -473,59 +460,27 @@ def copy_to(
             )
 
     def list_files(self, path: str | None = None) -> list[str]:
-        try:
-            data = {}
-            if path is not None:
-                data['path'] = path
+        data = {}
+        if path is not None:
+            data['path'] = path
 
-            response = send_request(
-                self.session,
-                'POST',
-                f'{self.runtime_url}/list_files',
-                json=data,
-                timeout=30,
-            )
-            if response.status_code == 200:
-                response_json = response.json()
-                assert isinstance(response_json, list)
-                return response_json
-            else:
-                error_message = response.text
-                raise Exception(
-                    f'[Runtime (ID={self.runtime_id})] List files operation failed: {error_message}'
-                )
-        except TimeoutError:
-            raise TimeoutError(
-                f'[Runtime (ID={self.runtime_id})] List files operation timed out'
-            )
-        except Exception as e:
-            raise RuntimeError(
-                f'[Runtime (ID={self.runtime_id})] List files operation failed: {str(e)}'
-            )
+        response = self._send_request(
+            'POST',
+            f'{self.runtime_url}/list_files',
+            json=data,
+            timeout=30,
+        )
+        response_json = response.json()
+        assert isinstance(response_json, list)
+        return response_json
 
     def copy_from(self, path: str) -> bytes:
         """Zip all files in the sandbox and return as a stream of bytes."""
-        try:
-            params = {'path': path}
-            response = send_request(
-                self.session,
-                'GET',
-                f'{self.runtime_url}/download_files',
-                params=params,
-                timeout=30,
-            )
-            if response.status_code == 200:
-                return response.content
-            else:
-                error_message = response.text
-                raise Exception(
-                    f'[Runtime (ID={self.runtime_id})] Copy operation failed: {error_message}'
-                )
-        except requests.Timeout:
-            raise TimeoutError(
-                f'[Runtime (ID={self.runtime_id})] Copy operation timed out'
-            )
-        except Exception as e:
-            raise RuntimeError(
-                f'[Runtime (ID={self.runtime_id})] Copy operation failed: {str(e)}'
-            )
+        params = {'path': path}
+        response = self._send_request(
+            'GET',
+            f'{self.runtime_url}/download_files',
+            params=params,
+            timeout=30,
+        )
+        return response.content

From c5369d8142909badb1ff9f5762993277d1e6d9c0 Mon Sep 17 00:00:00 2001
From: Robert Brennan <contact@rbren.io>
Date: Wed, 30 Oct 2024 20:47:54 -0400
Subject: [PATCH 40/60] more refactoring

---
 .../runtime/impl/remote/remote_runtime.py     | 96 +++++++------------
 1 file changed, 35 insertions(+), 61 deletions(-)

diff --git a/openhands/runtime/impl/remote/remote_runtime.py b/openhands/runtime/impl/remote/remote_runtime.py
index eac157e0515..a29ce5b29c8 100644
--- a/openhands/runtime/impl/remote/remote_runtime.py
+++ b/openhands/runtime/impl/remote/remote_runtime.py
@@ -130,41 +130,37 @@ def _start_or_attach_to_runtime(self):
 
     def _check_existing_runtime(self) -> bool:
         try:
-            response = send_request(
-                self.session,
+            response = self._send_request(
                 'GET',
                 f'{self.config.sandbox.remote_runtime_api_url}/runtime/{self.sid}',
                 timeout=5,
             )
-        except Exception as e:
+        except requests.HTTPError as e:
+            if e.response.status_code == 404:
+                return False
             self.log('debug', f'Error while looking for remote runtime: {e}')
-            return False
+            raise
 
-        if response.status_code == 200:
-            data = response.json()
-            status = data.get('status')
-            if status == 'running':
-                self._parse_runtime_response(response)
-                return True
-            elif status == 'stopped':
-                self.log('debug', 'Found existing remote runtime, but it is stopped')
-                return False
-            elif status == 'paused':
-                self.log('debug', 'Found existing remote runtime, but it is paused')
-                self._parse_runtime_response(response)
-                self._resume_runtime()
-                return True
-            else:
-                self.log('error', f'Invalid response from runtime API: {data}')
-                return False
+        data = response.json()
+        status = data.get('status')
+        if status == 'running':
+            self._parse_runtime_response(response)
+            return True
+        elif status == 'stopped':
+            self.log('debug', 'Found existing remote runtime, but it is stopped')
+            return False
+        elif status == 'paused':
+            self.log('debug', 'Found existing remote runtime, but it is paused')
+            self._parse_runtime_response(response)
+            self._resume_runtime()
+            return True
         else:
-            self.log('debug', 'Could not find existing remote runtime')
+            self.log('error', f'Invalid response from runtime API: {data}')
             return False
 
     def _build_runtime(self):
         self.log('debug', f'Building RemoteRuntime config:\n{self.config}')
-        response = send_request(
-            self.session,
+        response = self._send_request(
             'GET',
             f'{self.config.sandbox.remote_runtime_api_url}/registry_prefix',
             timeout=10,
@@ -194,14 +190,13 @@ def _build_runtime(self):
             force_rebuild=self.config.sandbox.force_rebuild_runtime,
         )
 
-        response = send_request(
-            self.session,
+        response = self._send_request(
             'GET',
             f'{self.config.sandbox.remote_runtime_api_url}/image_exists',
             params={'image': self.container_image},
             timeout=10,
         )
-        if response.status_code != 200 or not response.json()['exists']:
+        if not response.json()['exists']:
             raise RuntimeError(f'Container image {self.container_image} does not exist')
 
     def _start_runtime(self):
@@ -231,16 +226,11 @@ def _start_runtime(self):
         }
 
         # Start the sandbox using the /start endpoint
-        response = send_request(
-            self.session,
+        response = self._send_request(
             'POST',
             f'{self.config.sandbox.remote_runtime_api_url}/start',
             json=start_request,
         )
-        if response.status_code != 201:
-            raise RuntimeError(
-                f'[Runtime (ID={self.runtime_id})] Failed to start runtime: {response.text}'
-            )
         self._parse_runtime_response(response)
         self.log(
             'debug',
@@ -248,17 +238,12 @@ def _start_runtime(self):
         )
 
     def _resume_runtime(self):
-        response = send_request(
-            self.session,
+        self._send_request(
             'POST',
             f'{self.config.sandbox.remote_runtime_api_url}/resume',
             json={'runtime_id': self.runtime_id},
             timeout=30,
         )
-        if response.status_code != 200:
-            raise RuntimeError(
-                f'[Runtime (ID={self.runtime_id})] Failed to resume runtime: {response.text}'
-            )
         self.log('debug', 'Runtime resumed.')
 
     def _parse_runtime_response(self, response: requests.Response):
@@ -277,30 +262,20 @@ def _parse_runtime_response(self, response: requests.Response):
     )
     def _wait_until_alive(self):
         self.log('debug', f'Waiting for runtime to be alive at url: {self.runtime_url}')
-        runtime_info_response = send_request(
-            self.session,
+        runtime_info_response = self._send_request(
             'GET',
             f'{self.config.sandbox.remote_runtime_api_url}/runtime/{self.runtime_id}',
         )
-        if runtime_info_response.status_code != 200:
-            raise RuntimeError(
-                f'Failed to get runtime status: {runtime_info_response.status_code}. Response: {runtime_info_response.text}'
-            )
         runtime_data = runtime_info_response.json()
         assert 'runtime_id' in runtime_data
+        assert runtime_data['runtime_id'] == self.runtime_id
         assert 'pod_status' in runtime_data
-        self.runtime_id = runtime_data['runtime_id']
         pod_status = runtime_data['pod_status']
         if pod_status == 'Ready':
-            response = send_request(
-                self.session,
+            self._send_request(
                 'GET',
                 f'{self.runtime_url}/alive',
-            )
-            if response.status_code != 200:
-                msg = f'Runtime (ID={self.runtime_id}) is not alive yet, even though it reported Ready. Status: {response.status_code}.'
-                self.log('warning', msg)
-                raise RuntimeError(msg)
+            )  # will raise exception if we don't get 200 back
             return
         if pod_status in ('Failed', 'Unknown', 'Not Found'):
             # clean up the runtime
@@ -319,10 +294,9 @@ def close(self, timeout: int = 10):
         if self.config.sandbox.keep_remote_runtime_alive or self.attach_to_existing:
             self.session.close()
             return
-        if self.runtime_id:
+        if self.runtime_id and self.session:
             try:
-                response = send_request(
-                    self.session,
+                response = self._send_request(
                     'POST',
                     f'{self.config.sandbox.remote_runtime_api_url}/stop',
                     json={'runtime_id': self.runtime_id},
@@ -363,8 +337,7 @@ def run_action(self, action: Action) -> Observation:
             try:
                 request_body = {'action': event_to_dict(action)}
                 self.log('debug', f'Request body: {request_body}')
-                response = send_request(
-                    self.session,
+                response = self._send_request(
                     'POST',
                     f'{self.runtime_url}/execute_action',
                     json=request_body,
@@ -380,15 +353,16 @@ def run_action(self, action: Action) -> Observation:
                 return obs
             return obs
 
-    def _send_request(self, *args, **kwargs):
+    def _send_request(self, method, url, **kwargs):
+        is_runtime_request = self.runtime_url in url
         try:
-            return send_request(self.session, *args, **kwargs)
+            return send_request(self.session, method, url, **kwargs)
         except requests.Timeout:
             self.log('error', 'No response received within the timeout period.')
             raise
         except requests.HTTPError as e:
             print('got http error', e)
-            if e.response.status_code == 404:
+            if is_runtime_request and e.response.status_code == 404:
                 raise RuntimeDisconnectedError()
             else:
                 raise e

From b72239f4967b604ad434dd174b0724a7d8f052c0 Mon Sep 17 00:00:00 2001
From: Robert Brennan <contact@rbren.io>
Date: Wed, 30 Oct 2024 20:56:51 -0400
Subject: [PATCH 41/60] minor fixes

---
 openhands/runtime/impl/remote/remote_runtime.py | 9 ++++-----
 openhands/server/listen.py                      | 8 ++------
 2 files changed, 6 insertions(+), 11 deletions(-)

diff --git a/openhands/runtime/impl/remote/remote_runtime.py b/openhands/runtime/impl/remote/remote_runtime.py
index a29ce5b29c8..480f4978e86 100644
--- a/openhands/runtime/impl/remote/remote_runtime.py
+++ b/openhands/runtime/impl/remote/remote_runtime.py
@@ -354,20 +354,19 @@ def run_action(self, action: Action) -> Observation:
             return obs
 
     def _send_request(self, method, url, **kwargs):
-        is_runtime_request = self.runtime_url in url
+        is_runtime_request = self.runtime_url and self.runtime_url in url
         try:
             return send_request(self.session, method, url, **kwargs)
         except requests.Timeout:
             self.log('error', 'No response received within the timeout period.')
             raise
         except requests.HTTPError as e:
-            print('got http error', e)
             if is_runtime_request and e.response.status_code == 404:
-                raise RuntimeDisconnectedError()
+                raise RuntimeDisconnectedError(
+                    f'404 error while connecting to {self.runtime_url}'
+                )
             else:
                 raise e
-        except Exception as e:
-            print('Got unknown exception', e.__class__)
 
     def run(self, action: CmdRunAction) -> Observation:
         return self.run_action(action)
diff --git a/openhands/server/listen.py b/openhands/server/listen.py
index dceda9cb952..fa2e0c55e10 100644
--- a/openhands/server/listen.py
+++ b/openhands/server/listen.py
@@ -469,9 +469,7 @@ async def list_files(request: Request, path: str | None = None):
         )
 
     runtime: Runtime = request.state.conversation.runtime
-    file_list = await asyncio.create_task(
-        call_sync_from_async(runtime.list_files, path)
-    )
+    file_list = await call_sync_from_async(runtime.list_files, path)
     if path:
         file_list = [os.path.join(path, f) for f in file_list]
 
@@ -481,9 +479,7 @@ async def filter_for_gitignore(file_list, base_path):
         gitignore_path = os.path.join(base_path, '.gitignore')
         try:
             read_action = FileReadAction(gitignore_path)
-            observation = await asyncio.create_task(
-                call_sync_from_async(runtime.run_action, read_action)
-            )
+            observation = await call_sync_from_async(runtime.run_action, read_action)
             spec = PathSpec.from_lines(
                 GitWildMatchPattern, observation.content.splitlines()
             )

From b8f221fd35a654ae0096b048977b47a4e74dabb2 Mon Sep 17 00:00:00 2001
From: Robert Brennan <contact@rbren.io>
Date: Wed, 30 Oct 2024 21:07:47 -0400
Subject: [PATCH 42/60] fix session init

---
 openhands/runtime/impl/remote/remote_runtime.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/openhands/runtime/impl/remote/remote_runtime.py b/openhands/runtime/impl/remote/remote_runtime.py
index 480f4978e86..b47f48fb991 100644
--- a/openhands/runtime/impl/remote/remote_runtime.py
+++ b/openhands/runtime/impl/remote/remote_runtime.py
@@ -57,6 +57,10 @@ def __init__(
         status_callback: Optional[Callable] = None,
         attach_to_existing: bool = False,
     ):
+        # We need to set session and action_semaphore before the __init__ below, or we get odd errors
+        self.session = requests.Session()
+        self.action_semaphore = threading.Semaphore(1)
+
         super().__init__(
             config,
             event_stream,
@@ -66,15 +70,12 @@ def __init__(
             status_callback,
             attach_to_existing,
         )
-
         if self.config.sandbox.api_key is None:
             raise ValueError(
                 'API key is required to use the remote runtime. '
                 'Please set the API key in the config (config.toml) or as an environment variable (SANDBOX_API_KEY).'
             )
-        self.session = requests.Session()
         self.session.headers.update({'X-API-Key': self.config.sandbox.api_key})
-        self.action_semaphore = threading.Semaphore(1)
 
         if self.config.workspace_base is not None:
             self.log(

From 29930514827fdd55bc55496d36fd991c03ea4426 Mon Sep 17 00:00:00 2001
From: Robert Brennan <contact@rbren.io>
Date: Wed, 30 Oct 2024 21:10:19 -0400
Subject: [PATCH 43/60] add fixme

---
 openhands/controller/agent_controller.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/openhands/controller/agent_controller.py b/openhands/controller/agent_controller.py
index 8d6a55e0b52..35a5c4a5bbd 100644
--- a/openhands/controller/agent_controller.py
+++ b/openhands/controller/agent_controller.py
@@ -544,6 +544,7 @@ async def _handle_traffic_control(
                     f'Agent reached maximum {limit_type}, task paused. '
                     f'Current {limit_type}: {current_value:.2f}, max {limit_type}: {max_value:.2f}. '
                 )
+                # FIXME: this isn't really an exception--we should have a different path
                 await self._react_to_exception(
                     e,
                     new_state=AgentState.PAUSED,

From 43eb0d3aece5be15dcced7d8c9cfbb1668076e6e Mon Sep 17 00:00:00 2001
From: Robert Brennan <contact@rbren.io>
Date: Thu, 31 Oct 2024 11:54:23 -0400
Subject: [PATCH 44/60] fix lint

---
 frontend/src/components/error-message.tsx | 21 ++++++++++++++-------
 frontend/src/services/actions.ts          | 10 +++++++---
 frontend/src/state/chatSlice.ts           |  2 +-
 3 files changed, 22 insertions(+), 11 deletions(-)

diff --git a/frontend/src/components/error-message.tsx b/frontend/src/components/error-message.tsx
index 439067ffc9a..88b02aae15c 100644
--- a/frontend/src/components/error-message.tsx
+++ b/frontend/src/components/error-message.tsx
@@ -2,14 +2,14 @@ import { useState, useEffect } from "react";
 import { useTranslation } from "react-i18next";
 
 interface ErrorMessageProps {
-  error: string;
+  id: string;
   message: string;
 }
 
 export function ErrorMessage({ id, message }: ErrorMessageProps) {
   const { t, i18n } = useTranslation();
   const [showDetails, setShowDetails] = useState(true);
-  const [headline, setHeadline] = useState('');
+  const [headline, setHeadline] = useState("");
   const [details, setDetails] = useState(message);
 
   useEffect(() => {
@@ -23,11 +23,18 @@ export function ErrorMessage({ id, message }: ErrorMessageProps) {
   return (
     <div className="flex gap-2 items-center justify-start border-l-2 border-danger pl-2 my-2 py-2">
       <div className="text-sm leading-4 flex flex-col gap-2">
-        {(headline && <p className="text-danger font-bold">{headline}</p>)}
-        <a onClick={() => setShowDetails(!showDetails)} className="cursor-pointer">
-        {(headline && (showDetails ? t('ERROR_MESSAGE$HIDE_DETAILS') : t('ERROR_MESSAGE$SHOW_DETAILS')))}
-        </a>
-        {(showDetails && <p className="text-neutral-300">{details}</p>)}
+        {headline && <p className="text-danger font-bold">{headline}</p>}
+        <button
+          type="button"
+          onClick={() => setShowDetails(!showDetails)}
+          className="cursor-pointer text-left"
+        >
+          {headline &&
+            (showDetails
+              ? t("ERROR_MESSAGE$HIDE_DETAILS")
+              : t("ERROR_MESSAGE$SHOW_DETAILS"))}
+        </button>
+        {showDetails && <p className="text-neutral-300">{details}</p>}
       </div>
     </div>
   );
diff --git a/frontend/src/services/actions.ts b/frontend/src/services/actions.ts
index 36e6e473021..ccdff694e87 100644
--- a/frontend/src/services/actions.ts
+++ b/frontend/src/services/actions.ts
@@ -1,4 +1,8 @@
-import { addAssistantMessage, addUserMessage, addErrorMessage } from "#/state/chatSlice";
+import {
+  addAssistantMessage,
+  addUserMessage,
+  addErrorMessage,
+} from "#/state/chatSlice";
 import { setCode, setActiveFilepath } from "#/state/codeSlice";
 import { appendJupyterInput } from "#/state/jupyterSlice";
 import {
@@ -119,13 +123,13 @@ export function handleActionMessage(message: ActionMessage) {
 }
 
 export function handleStatusMessage(message: StatusMessage) {
-  if (message.type === 'info') {
+  if (message.type === "info") {
     store.dispatch(
       setCurStatusMessage({
         ...message,
       }),
     );
-  } else if (message.type === 'error') {
+  } else if (message.type === "error") {
     store.dispatch(
       addErrorMessage({
         ...message,
diff --git a/frontend/src/state/chatSlice.ts b/frontend/src/state/chatSlice.ts
index 375e3ba260f..996d731a11b 100644
--- a/frontend/src/state/chatSlice.ts
+++ b/frontend/src/state/chatSlice.ts
@@ -39,7 +39,7 @@ export const chatSlice = createSlice({
 
     addErrorMessage(
       state,
-      action: PayloadAction<{ id: string, message: string }>,
+      action: PayloadAction<{ id: string; message: string }>,
     ) {
       const { id, message } = action.payload;
       state.messages.push({ id, message, error: true });

From 1002d44ade6843dedfe1eabca983166b739fbb5f Mon Sep 17 00:00:00 2001
From: Robert Brennan <contact@rbren.io>
Date: Thu, 31 Oct 2024 11:55:18 -0400
Subject: [PATCH 45/60] change test name

---
 tests/unit/test_agent_controller.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/unit/test_agent_controller.py b/tests/unit/test_agent_controller.py
index e6de80d274b..751bf6231bb 100644
--- a/tests/unit/test_agent_controller.py
+++ b/tests/unit/test_agent_controller.py
@@ -97,7 +97,7 @@ async def test_on_event_change_agent_state_action(mock_agent, mock_event_stream)
 
 
 @pytest.mark.asyncio
-async def test__react_to_error(mock_agent, mock_event_stream):
+async def test_react_to_exception(mock_agent, mock_event_stream):
     controller = AgentController(
         agent=mock_agent,
         event_stream=mock_event_stream,

From 4f69d385abcdaa2f136443f3c8da0661b6e033bb Mon Sep 17 00:00:00 2001
From: Robert Brennan <contact@rbren.io>
Date: Thu, 31 Oct 2024 12:55:16 -0400
Subject: [PATCH 46/60] lint

---
 frontend/src/components/chat/message.d.ts | 3 ++-
 frontend/src/routes/_oh.app.tsx           | 2 +-
 frontend/src/state/statusSlice.ts         | 6 ++++--
 3 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/frontend/src/components/chat/message.d.ts b/frontend/src/components/chat/message.d.ts
index e7248fbd648..6dd3181e93c 100644
--- a/frontend/src/components/chat/message.d.ts
+++ b/frontend/src/components/chat/message.d.ts
@@ -6,6 +6,7 @@ type Message = {
 };
 
 type ErrorMessage = {
-  error: string;
+  error: boolean;
+  id: string;
   message: string;
 };
diff --git a/frontend/src/routes/_oh.app.tsx b/frontend/src/routes/_oh.app.tsx
index 9af5ad278bb..b73d4dc2d66 100644
--- a/frontend/src/routes/_oh.app.tsx
+++ b/frontend/src/routes/_oh.app.tsx
@@ -196,7 +196,7 @@ function App() {
         }),
       );
     } else {
-      dispatch(addErrorMessage({ error, message: details }));
+      dispatch(addErrorMessage({ message: details }));
     }
   };
 
diff --git a/frontend/src/state/statusSlice.ts b/frontend/src/state/statusSlice.ts
index b0b503d6c6b..6f5158c9f6d 100644
--- a/frontend/src/state/statusSlice.ts
+++ b/frontend/src/state/statusSlice.ts
@@ -2,8 +2,10 @@ import { createSlice, PayloadAction } from "@reduxjs/toolkit";
 import { StatusMessage } from "#/types/Message";
 
 const initialStatusMessage: StatusMessage = {
-  status: "",
-  is_error: false,
+  status_update: true,
+  type: "info",
+  id: "",
+  message: "",
 };
 
 export const statusSlice = createSlice({

From 5efc862d215a28bf18698d3004d281232bff2e84 Mon Sep 17 00:00:00 2001
From: Robert Brennan <contact@rbren.io>
Date: Thu, 31 Oct 2024 14:04:04 -0400
Subject: [PATCH 47/60] fix tests

---
 frontend/__tests__/components/chat/chat-interface.test.tsx | 4 ++--
 frontend/src/components/chat/message.d.ts                  | 2 +-
 frontend/src/components/error-message.tsx                  | 4 ++--
 frontend/src/routes/_oh.app.tsx                            | 1 -
 frontend/src/state/chatSlice.ts                            | 2 +-
 5 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/frontend/__tests__/components/chat/chat-interface.test.tsx b/frontend/__tests__/components/chat/chat-interface.test.tsx
index d27897c2d8e..501389f9897 100644
--- a/frontend/__tests__/components/chat/chat-interface.test.tsx
+++ b/frontend/__tests__/components/chat/chat-interface.test.tsx
@@ -128,14 +128,14 @@ describe.skip("ChatInterface", () => {
         timestamp: new Date().toISOString(),
       },
       {
-        error: "Woops!",
+        error: true,
+        id: "",
         message: "Something went wrong",
       },
     ];
     renderChatInterface(messages);
 
     const error = screen.getByTestId("error-message");
-    expect(within(error).getByText("Woops!")).toBeInTheDocument();
     expect(within(error).getByText("Something went wrong")).toBeInTheDocument();
   });
 
diff --git a/frontend/src/components/chat/message.d.ts b/frontend/src/components/chat/message.d.ts
index 6dd3181e93c..b2ccf43c994 100644
--- a/frontend/src/components/chat/message.d.ts
+++ b/frontend/src/components/chat/message.d.ts
@@ -7,6 +7,6 @@ type Message = {
 
 type ErrorMessage = {
   error: boolean;
-  id: string;
+  id?: string;
   message: string;
 };
diff --git a/frontend/src/components/error-message.tsx b/frontend/src/components/error-message.tsx
index 88b02aae15c..75165a9a60f 100644
--- a/frontend/src/components/error-message.tsx
+++ b/frontend/src/components/error-message.tsx
@@ -2,7 +2,7 @@ import { useState, useEffect } from "react";
 import { useTranslation } from "react-i18next";
 
 interface ErrorMessageProps {
-  id: string;
+  id?: string;
   message: string;
 }
 
@@ -13,7 +13,7 @@ export function ErrorMessage({ id, message }: ErrorMessageProps) {
   const [details, setDetails] = useState(message);
 
   useEffect(() => {
-    if (i18n.exists(id)) {
+    if (id && i18n.exists(id)) {
       setHeadline(t(id));
       setDetails(message);
       setShowDetails(false);
diff --git a/frontend/src/routes/_oh.app.tsx b/frontend/src/routes/_oh.app.tsx
index b73d4dc2d66..cb1ef3b9c7e 100644
--- a/frontend/src/routes/_oh.app.tsx
+++ b/frontend/src/routes/_oh.app.tsx
@@ -191,7 +191,6 @@ function App() {
     if (!details) {
       dispatch(
         addErrorMessage({
-          error: "An error has occured",
           message: error,
         }),
       );
diff --git a/frontend/src/state/chatSlice.ts b/frontend/src/state/chatSlice.ts
index 996d731a11b..7d77901fee7 100644
--- a/frontend/src/state/chatSlice.ts
+++ b/frontend/src/state/chatSlice.ts
@@ -39,7 +39,7 @@ export const chatSlice = createSlice({
 
     addErrorMessage(
       state,
-      action: PayloadAction<{ id: string; message: string }>,
+      action: PayloadAction<{ id?: string; message: string }>,
     ) {
       const { id, message } = action.payload;
       state.messages.push({ id, message, error: true });

From 75d8028fe6bca9258e2f42450a941f8e0cdccd55 Mon Sep 17 00:00:00 2001
From: Robert Brennan <contact@rbren.io>
Date: Thu, 31 Oct 2024 14:22:18 -0400
Subject: [PATCH 48/60] fix some tests

---
 openhands/controller/agent_controller.py | 12 +++---------
 tests/unit/test_agent_controller.py      | 11 +++++------
 2 files changed, 8 insertions(+), 15 deletions(-)

diff --git a/openhands/controller/agent_controller.py b/openhands/controller/agent_controller.py
index 35a5c4a5bbd..3027f57a8d5 100644
--- a/openhands/controller/agent_controller.py
+++ b/openhands/controller/agent_controller.py
@@ -145,11 +145,8 @@ async def update_state_after_step(self):
     async def _react_to_exception(
         self,
         e: Exception,
-        new_state: AgentState = AgentState.ERROR,
     ):
-        if new_state is not None:
-            # it's important to set the state before adding the error event, so that metrics sync properly
-            await self.set_agent_state_to(new_state)
+        await self.set_agent_state_to(AgentState.ERROR)
         if self._status_callback is not None:
             err_id = ''
             if isinstance(e, litellm.AuthenticationError):
@@ -541,14 +538,11 @@ async def _handle_traffic_control(
                 await self._react_to_exception(e)
             else:
                 e = RuntimeError(
-                    f'Agent reached maximum {limit_type}, task paused. '
+                    f'Agent reached maximum {limit_type}. '
                     f'Current {limit_type}: {current_value:.2f}, max {limit_type}: {max_value:.2f}. '
                 )
                 # FIXME: this isn't really an exception--we should have a different path
-                await self._react_to_exception(
-                    e,
-                    new_state=AgentState.PAUSED,
-                )
+                await self._react_to_exception(e)
             stop_step = True
         return stop_step
 
diff --git a/tests/unit/test_agent_controller.py b/tests/unit/test_agent_controller.py
index 751bf6231bb..79ba0ecdbd1 100644
--- a/tests/unit/test_agent_controller.py
+++ b/tests/unit/test_agent_controller.py
@@ -7,7 +7,6 @@
 from openhands.controller.agent_controller import AgentController
 from openhands.controller.state.state import TrafficControlState
 from openhands.core.config import AppConfig
-from openhands.core.exceptions import LLMMalformedActionError
 from openhands.core.main import run_controller
 from openhands.core.schema import AgentState
 from openhands.events import Event, EventSource, EventStream, EventStreamSubscriber
@@ -107,7 +106,7 @@ async def test_react_to_exception(mock_agent, mock_event_stream):
         headless_mode=True,
     )
     error_message = 'Test error'
-    await controller._react_to_error(error_message)
+    await controller._react_to_exception(error_message)
     assert controller.state.last_error == error_message
     controller.event_stream.add_event.assert_called_once()
     await controller.close()
@@ -124,12 +123,12 @@ async def test_step_with_exception(mock_agent, mock_event_stream):
         headless_mode=True,
     )
     controller.state.agent_state = AgentState.RUNNING
-    controller._react_to_error = AsyncMock()
-    controller.agent.step.side_effect = LLMMalformedActionError('Malformed action')
+    controller._react_to_exception = AsyncMock()
+    controller.agent.step.side_effect = RuntimeError('Test error')
     await controller._step()
 
-    # Verify that _react_to_error was called with the correct error message
-    controller._react_to_error.assert_called_once_with('Malformed action')
+    # Verify that _react_to_exception was called with the correct error message
+    controller._react_to_exception.assert_called_once_with('')
     await controller.close()
 
 

From fa25ad1b246d326ff8b2dd4d7755b35efcce48f7 Mon Sep 17 00:00:00 2001
From: Robert Brennan <contact@rbren.io>
Date: Thu, 31 Oct 2024 14:24:22 -0400
Subject: [PATCH 49/60] better names

---
 tests/unit/test_agent_controller.py | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/tests/unit/test_agent_controller.py b/tests/unit/test_agent_controller.py
index 79ba0ecdbd1..2cc43f9a724 100644
--- a/tests/unit/test_agent_controller.py
+++ b/tests/unit/test_agent_controller.py
@@ -146,14 +146,14 @@ async def test_run_controller_with_fatal_error(mock_agent, mock_event_stream):
     agent.llm.metrics = Metrics()
     agent.llm.config = config.get_llm_config()
 
-    fatal_error_obs = ErrorObservation('Fatal error detected', fatal=True)
-    fatal_error_obs._cause = event.id
+    error_obs = ErrorObservation('You messed around with Jim')
+    error_obs._cause = event.id
 
     runtime = MagicMock(spec=Runtime)
 
     async def on_event(event: Event):
         if isinstance(event, CmdRunAction):
-            await event_stream.async_add_event(fatal_error_obs, EventSource.USER)
+            await event_stream.async_add_event(error_obs, EventSource.USER)
 
     event_stream.subscribe(EventStreamSubscriber.RUNTIME, on_event)
     runtime.event_stream = event_stream
@@ -172,10 +172,7 @@ async def on_event(event: Event):
     # it will first become AgentState.ERROR, then become AgentState.STOPPED
     # in side run_controller (since the while loop + sleep no longer loop)
     assert state.agent_state == AgentState.STOPPED
-    assert (
-        state.get_last_error()
-        == 'There was a fatal error during agent execution: **ErrorObservation**\nFatal error detected'
-    )
+    assert state.get_last_error() == 'You messed around with Jim'
     assert len(list(event_stream.get_events())) == 5
 
 

From 0a5e90028afc0fe7b3365789e8d6d2fe35f5d9cb Mon Sep 17 00:00:00 2001
From: Robert Brennan <contact@rbren.io>
Date: Thu, 31 Oct 2024 14:25:16 -0400
Subject: [PATCH 50/60] fix message

---
 tests/unit/test_agent_controller.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/tests/unit/test_agent_controller.py b/tests/unit/test_agent_controller.py
index 2cc43f9a724..c4f9b97b0ad 100644
--- a/tests/unit/test_agent_controller.py
+++ b/tests/unit/test_agent_controller.py
@@ -242,10 +242,7 @@ async def on_event(event: Event):
     # it will first become AgentState.ERROR, then become AgentState.STOPPED
     # in side run_controller (since the while loop + sleep no longer loop)
     assert state.agent_state == AgentState.STOPPED
-    assert (
-        state.last_error
-        == 'There was a fatal error during agent execution: **FatalErrorObservation**\nAgent got stuck in a loop'
-    )
+    assert state.last_error == 'Agent got stuck in a loop'
 
 
 @pytest.mark.asyncio

From 019aed60937fa07914c5290bff7003dcaf1dd019 Mon Sep 17 00:00:00 2001
From: Robert Brennan <contact@rbren.io>
Date: Thu, 31 Oct 2024 14:34:54 -0400
Subject: [PATCH 51/60] fix more tests

---
 tests/unit/test_agent_controller.py | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/tests/unit/test_agent_controller.py b/tests/unit/test_agent_controller.py
index c4f9b97b0ad..25a1124472f 100644
--- a/tests/unit/test_agent_controller.py
+++ b/tests/unit/test_agent_controller.py
@@ -43,6 +43,11 @@ def mock_event_stream():
     return MagicMock(spec=EventStream)
 
 
+@pytest.fixture
+def mock_status_callback():
+    return AsyncMock()
+
+
 @pytest.mark.asyncio
 async def test_set_agent_state(mock_agent, mock_event_stream):
     controller = AgentController(
@@ -96,19 +101,19 @@ async def test_on_event_change_agent_state_action(mock_agent, mock_event_stream)
 
 
 @pytest.mark.asyncio
-async def test_react_to_exception(mock_agent, mock_event_stream):
+async def test_react_to_exception(mock_agent, mock_event_stream, mock_status_callback):
     controller = AgentController(
         agent=mock_agent,
         event_stream=mock_event_stream,
+        status_callback=mock_status_callback,
         max_iterations=10,
         sid='test',
         confirmation_mode=False,
         headless_mode=True,
     )
     error_message = 'Test error'
-    await controller._react_to_exception(error_message)
-    assert controller.state.last_error == error_message
-    controller.event_stream.add_event.assert_called_once()
+    await controller._react_to_exception(RuntimeError(error_message))
+    controller._status_callback.assert_called_once()
     await controller.close()
 
 
@@ -242,7 +247,7 @@ async def on_event(event: Event):
     # it will first become AgentState.ERROR, then become AgentState.STOPPED
     # in side run_controller (since the while loop + sleep no longer loop)
     assert state.agent_state == AgentState.STOPPED
-    assert state.last_error == 'Agent got stuck in a loop'
+    assert state.get_last_error() == 'Agent got stuck in a loop'
 
 
 @pytest.mark.asyncio

From 0793e9fc9dafdd13e66d0f561c37fc6660986412 Mon Sep 17 00:00:00 2001
From: Robert Brennan <contact@rbren.io>
Date: Thu, 31 Oct 2024 14:48:44 -0400
Subject: [PATCH 52/60] remove redundant 200 checks

---
 .../impl/eventstream/eventstream_runtime.py   | 52 ++++---------------
 1 file changed, 10 insertions(+), 42 deletions(-)

diff --git a/openhands/runtime/impl/eventstream/eventstream_runtime.py b/openhands/runtime/impl/eventstream/eventstream_runtime.py
index bef8243303d..ddc47d0c66a 100644
--- a/openhands/runtime/impl/eventstream/eventstream_runtime.py
+++ b/openhands/runtime/impl/eventstream/eventstream_runtime.py
@@ -388,18 +388,12 @@ def _wait_until_alive(self):
         if not self.log_buffer:
             raise RuntimeError('Runtime client is not ready.')
 
-        response = send_request(
+        send_request(
             self.session,
             'GET',
             f'{self.api_url}/alive',
             timeout=5,
         )
-        if response.status_code == 200:
-            return
-        else:
-            msg = f'Action execution API is not alive. Response: {response}'
-            self.log('error', msg)
-            raise RuntimeError(msg)
 
     def close(self, rm_all_containers: bool = True):
         """Closes the EventStreamRuntime and associated objects
@@ -488,18 +482,9 @@ def run_action(self, action: Action) -> Observation:
                     json={'action': event_to_dict(action)},
                     timeout=action.timeout,
                 )
-                if response.status_code == 200:
-                    output = response.json()
-                    obs = observation_from_dict(output)
-                    obs._cause = action.id  # type: ignore[attr-defined]
-                else:
-                    self.log('debug', f'action: {action}')
-                    self.log('debug', f'response: {response}')
-                    error_message = response.text
-                    self.log('error', f'Error from server: {error_message}')
-                    obs = ErrorObservation(
-                        f'Action execution failed: {error_message}',
-                    )
+                output = response.json()
+                obs = observation_from_dict(output)
+                obs._cause = action.id  # type: ignore[attr-defined]
             except requests.Timeout:
                 self.log('error', 'No response received within the timeout period.')
                 obs = ErrorObservation(
@@ -561,7 +546,7 @@ def copy_to(
 
             params = {'destination': sandbox_dest, 'recursive': str(recursive).lower()}
 
-            response = send_request(
+            send_request(
                 self.session,
                 'POST',
                 f'{self.api_url}/upload_file',
@@ -569,11 +554,6 @@ def copy_to(
                 params=params,
                 timeout=300,
             )
-            if response.status_code == 200:
-                return
-            else:
-                error_message = response.text
-                raise Exception(f'Copy operation failed: {error_message}')
 
         except requests.Timeout:
             raise TimeoutError('Copy operation timed out')
@@ -605,17 +585,11 @@ def list_files(self, path: str | None = None) -> list[str]:
                 json=data,
                 timeout=10,
             )
-            if response.status_code == 200:
-                response_json = response.json()
-                assert isinstance(response_json, list)
-                return response_json
-            else:
-                error_message = response.text
-                raise Exception(f'List files operation failed: {error_message}')
+            response_json = response.json()
+            assert isinstance(response_json, list)
+            return response_json
         except requests.Timeout:
             raise TimeoutError('List files operation timed out')
-        except Exception as e:
-            raise RuntimeError(f'List files operation failed: {str(e)}')
 
     def copy_from(self, path: str) -> bytes:
         """Zip all files in the sandbox and return as a stream of bytes."""
@@ -630,16 +604,10 @@ def copy_from(self, path: str) -> bytes:
                 stream=True,
                 timeout=30,
             )
-            if response.status_code == 200:
-                data = response.content
-                return data
-            else:
-                error_message = response.text
-                raise Exception(f'Copy operation failed: {error_message}')
+            data = response.content
+            return data
         except requests.Timeout:
             raise TimeoutError('Copy operation timed out')
-        except Exception as e:
-            raise RuntimeError(f'Copy operation failed: {str(e)}')
 
     def _is_port_in_use_docker(self, port):
         containers = self.docker_client.containers.list()

From a021dada688936771afeeffdf4c36beb1e94e3c9 Mon Sep 17 00:00:00 2001
From: Robert Brennan <contact@rbren.io>
Date: Thu, 31 Oct 2024 15:08:51 -0400
Subject: [PATCH 53/60] fix tests

---
 tests/unit/test_agent_controller.py | 55 +++++++++--------------------
 1 file changed, 17 insertions(+), 38 deletions(-)

diff --git a/tests/unit/test_agent_controller.py b/tests/unit/test_agent_controller.py
index dbd4bb701b5..b4d7fcddd2f 100644
--- a/tests/unit/test_agent_controller.py
+++ b/tests/unit/test_agent_controller.py
@@ -117,26 +117,6 @@ async def test_react_to_exception(mock_agent, mock_event_stream, mock_status_cal
     await controller.close()
 
 
-@pytest.mark.asyncio
-async def test_step_with_exception(mock_agent, mock_event_stream):
-    controller = AgentController(
-        agent=mock_agent,
-        event_stream=mock_event_stream,
-        max_iterations=10,
-        sid='test',
-        confirmation_mode=False,
-        headless_mode=True,
-    )
-    controller.state.agent_state = AgentState.RUNNING
-    controller._react_to_exception = AsyncMock()
-    controller.agent.step.side_effect = RuntimeError('Test error')
-    await controller._step()
-
-    # Verify that _react_to_exception was called with the correct error message
-    controller._react_to_exception.assert_called_once_with('')
-    await controller.close()
-
-
 @pytest.mark.asyncio
 async def test_run_controller_with_fatal_error(mock_agent, mock_event_stream):
     config = AppConfig()
@@ -144,20 +124,23 @@ async def test_run_controller_with_fatal_error(mock_agent, mock_event_stream):
     event_stream = EventStream(sid='test', file_store=file_store)
 
     agent = MagicMock(spec=Agent)
-    # a random message to send to the runtime
-    event = CmdRunAction(command='ls')
-    agent.step.return_value = event
+    agent = MagicMock(spec=Agent)
+
+    def agent_step_fn(state):
+        print(f'agent_step_fn received state: {state}')
+        return CmdRunAction(command='ls')
+
+    agent.step = agent_step_fn
     agent.llm = MagicMock(spec=LLM)
     agent.llm.metrics = Metrics()
     agent.llm.config = config.get_llm_config()
 
-    error_obs = ErrorObservation('You messed around with Jim')
-    error_obs._cause = event.id
-
     runtime = MagicMock(spec=Runtime)
 
     async def on_event(event: Event):
         if isinstance(event, CmdRunAction):
+            error_obs = ErrorObservation('You messed around with Jim')
+            error_obs._cause = event.id
             await event_stream.async_add_event(error_obs, EventSource.USER)
 
     event_stream.subscribe(EventStreamSubscriber.RUNTIME, on_event)
@@ -173,27 +156,25 @@ async def on_event(event: Event):
     )
     print(f'state: {state}')
     print(f'event_stream: {list(event_stream.get_events())}')
-    assert state.iteration == 1
+    assert state.iteration == 4
     # it will first become AgentState.ERROR, then become AgentState.STOPPED
     # in side run_controller (since the while loop + sleep no longer loop)
     assert state.agent_state == AgentState.STOPPED
     assert state.get_last_error() == 'You messed around with Jim'
-    assert len(list(event_stream.get_events())) == 5
+    assert len(list(event_stream.get_events())) == 12
 
 
 @pytest.mark.asyncio
-async def test_run_controller_stop_with_stuck(mock_agent, mock_event_stream):
+async def test_run_controller_stop_with_stuck():
     config = AppConfig()
     file_store = get_file_store(config.file_store, config.file_store_path)
     event_stream = EventStream(sid='test', file_store=file_store)
 
     agent = MagicMock(spec=Agent)
-    # a random message to send to the runtime
-    event = CmdRunAction(command='ls')
 
     def agent_step_fn(state):
         print(f'agent_step_fn received state: {state}')
-        return event
+        return CmdRunAction(command='ls')
 
     agent.step = agent_step_fn
     agent.llm = MagicMock(spec=LLM)
@@ -243,13 +224,11 @@ async def on_event(event: Event):
             and observation_dict['content'] == 'Non fatal error here to trigger loop'
         )
     last_event = event_to_dict(events[-1])
-    assert last_event['extras']['agent_state'] == 'error'
+    assert last_event['extras']['agent_state'] == 'stopped'
     assert last_event['observation'] == 'agent_state_changed'
 
-    # it will first become AgentState.ERROR, then become AgentState.STOPPED
-    # in side run_controller (since the while loop + sleep no longer loop)
     assert state.agent_state == AgentState.STOPPED
-    assert state.get_last_error() == 'Agent got stuck in a loop'
+    assert state.get_last_error() == 'Non fatal error here to trigger loop'
 
 
 @pytest.mark.asyncio
@@ -316,7 +295,7 @@ async def test_step_max_iterations(mock_agent, mock_event_stream):
     assert controller.state.traffic_control_state == TrafficControlState.NORMAL
     await controller._step()
     assert controller.state.traffic_control_state == TrafficControlState.THROTTLING
-    assert controller.state.agent_state == AgentState.PAUSED
+    assert controller.state.agent_state == AgentState.ERROR
     await controller.close()
 
 
@@ -356,7 +335,7 @@ async def test_step_max_budget(mock_agent, mock_event_stream):
     assert controller.state.traffic_control_state == TrafficControlState.NORMAL
     await controller._step()
     assert controller.state.traffic_control_state == TrafficControlState.THROTTLING
-    assert controller.state.agent_state == AgentState.PAUSED
+    assert controller.state.agent_state == AgentState.ERROR
     await controller.close()
 
 

From a08879f3140ec0e2564d8e9098a7a7c3f6d1ce24 Mon Sep 17 00:00:00 2001
From: Robert Brennan <contact@rbren.io>
Date: Thu, 31 Oct 2024 18:43:33 -0400
Subject: [PATCH 54/60]  add stacklevel

---
 openhands/controller/agent_controller.py | 2 +-
 openhands/runtime/base.py                | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/openhands/controller/agent_controller.py b/openhands/controller/agent_controller.py
index 84105685225..a0b6f2c5a8c 100644
--- a/openhands/controller/agent_controller.py
+++ b/openhands/controller/agent_controller.py
@@ -132,7 +132,7 @@ def log(self, level: str, message: str, extra: dict | None = None):
             message (str): The message to log.
         """
         message = f'[Agent Controller {self.id}] {message}'
-        getattr(logger, level)(message, extra=extra)
+        getattr(logger, level)(message, extra=extra, stacklevel=2)
 
     def update_state_before_step(self):
         self.state.iteration += 1
diff --git a/openhands/runtime/base.py b/openhands/runtime/base.py
index 3228b2272e7..2d8b02cd2dd 100644
--- a/openhands/runtime/base.py
+++ b/openhands/runtime/base.py
@@ -113,7 +113,7 @@ def close(self) -> None:
 
     def log(self, level: str, message: str) -> None:
         message = f'[runtime {self.sid}] {message}'
-        getattr(logger, level)(message)
+        getattr(logger, level)(message, stacklevel=2)
 
     def send_status_message(self, message_id: str):
         """Sends a status message if the callback function was provided."""

From 98e25d12d4021ad79110276629e4649d82f82dd1 Mon Sep 17 00:00:00 2001
From: Robert Brennan <contact@rbren.io>
Date: Thu, 31 Oct 2024 18:43:40 -0400
Subject: [PATCH 55/60] add debug info

---
 openhands/runtime/impl/eventstream/eventstream_runtime.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/openhands/runtime/impl/eventstream/eventstream_runtime.py b/openhands/runtime/impl/eventstream/eventstream_runtime.py
index ddc47d0c66a..645be31ecf4 100644
--- a/openhands/runtime/impl/eventstream/eventstream_runtime.py
+++ b/openhands/runtime/impl/eventstream/eventstream_runtime.py
@@ -475,6 +475,7 @@ def run_action(self, action: Action) -> Observation:
             assert action.timeout is not None
 
             try:
+                print('SEND ACTION', event_to_dict(action))
                 response = send_request(
                     self.session,
                     'POST',
@@ -483,13 +484,19 @@ def run_action(self, action: Action) -> Observation:
                     timeout=action.timeout,
                 )
                 output = response.json()
+                print('GOT OUTPUT', output)
                 obs = observation_from_dict(output)
                 obs._cause = action.id  # type: ignore[attr-defined]
             except requests.Timeout:
+                print('EXCEPTION TIMEOUT')
                 self.log('error', 'No response received within the timeout period.')
                 obs = ErrorObservation(
                     f'Action execution timed out after {action.timeout} seconds.',
                 )
+            except Exception as e:
+                print('OTHER EXCEPTION', e)
+                raise e
+
             self._refresh_logs()
             return obs
 

From 0e874153c258f0a67eb35732bfd4ba205f26d4c1 Mon Sep 17 00:00:00 2001
From: Robert Brennan <contact@rbren.io>
Date: Thu, 31 Oct 2024 18:46:51 -0400
Subject: [PATCH 56/60] fix timeout error

---
 openhands/runtime/utils/request.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/openhands/runtime/utils/request.py b/openhands/runtime/utils/request.py
index 3b654c7aca4..27b90072b9f 100644
--- a/openhands/runtime/utils/request.py
+++ b/openhands/runtime/utils/request.py
@@ -57,7 +57,8 @@ def send_request(
     timeout: int = 10,
     **kwargs: Any,
 ) -> requests.Response:
-    kwargs['timeout'] = timeout
+    # wait a few more seconds to get the timeout error from client side
+    kwargs['timeout'] = timeout + 5
     response = session.request(method, url, **kwargs)
     response.raise_for_status()
     return response

From 6cc2cac4b796d164ee01fe21a10ed8439e597b7c Mon Sep 17 00:00:00 2001
From: Robert Brennan <contact@rbren.io>
Date: Thu, 31 Oct 2024 18:47:00 -0400
Subject: [PATCH 57/60] Revert "add debug info"

This reverts commit 98e25d12d4021ad79110276629e4649d82f82dd1.
---
 openhands/runtime/impl/eventstream/eventstream_runtime.py | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/openhands/runtime/impl/eventstream/eventstream_runtime.py b/openhands/runtime/impl/eventstream/eventstream_runtime.py
index 645be31ecf4..ddc47d0c66a 100644
--- a/openhands/runtime/impl/eventstream/eventstream_runtime.py
+++ b/openhands/runtime/impl/eventstream/eventstream_runtime.py
@@ -475,7 +475,6 @@ def run_action(self, action: Action) -> Observation:
             assert action.timeout is not None
 
             try:
-                print('SEND ACTION', event_to_dict(action))
                 response = send_request(
                     self.session,
                     'POST',
@@ -484,19 +483,13 @@ def run_action(self, action: Action) -> Observation:
                     timeout=action.timeout,
                 )
                 output = response.json()
-                print('GOT OUTPUT', output)
                 obs = observation_from_dict(output)
                 obs._cause = action.id  # type: ignore[attr-defined]
             except requests.Timeout:
-                print('EXCEPTION TIMEOUT')
                 self.log('error', 'No response received within the timeout period.')
                 obs = ErrorObservation(
                     f'Action execution timed out after {action.timeout} seconds.',
                 )
-            except Exception as e:
-                print('OTHER EXCEPTION', e)
-                raise e
-
             self._refresh_logs()
             return obs
 

From 4f84a5029634aae42f2991ca9a3816903888b567 Mon Sep 17 00:00:00 2001
From: Robert Brennan <contact@rbren.io>
Date: Thu, 31 Oct 2024 19:16:44 -0400
Subject: [PATCH 58/60] add ids to ErrorObservation

---
 frontend/src/components/error-message.tsx | 21 +++++++++++----------
 frontend/src/routes/_oh.app.tsx           | 21 ++++++---------------
 openhands/events/observation/error.py     |  1 +
 3 files changed, 18 insertions(+), 25 deletions(-)

diff --git a/frontend/src/components/error-message.tsx b/frontend/src/components/error-message.tsx
index 75165a9a60f..d2624864473 100644
--- a/frontend/src/components/error-message.tsx
+++ b/frontend/src/components/error-message.tsx
@@ -24,16 +24,17 @@ export function ErrorMessage({ id, message }: ErrorMessageProps) {
     <div className="flex gap-2 items-center justify-start border-l-2 border-danger pl-2 my-2 py-2">
       <div className="text-sm leading-4 flex flex-col gap-2">
         {headline && <p className="text-danger font-bold">{headline}</p>}
-        <button
-          type="button"
-          onClick={() => setShowDetails(!showDetails)}
-          className="cursor-pointer text-left"
-        >
-          {headline &&
-            (showDetails
-              ? t("ERROR_MESSAGE$HIDE_DETAILS")
-              : t("ERROR_MESSAGE$SHOW_DETAILS"))}
-        </button>
+        {headline && (
+          <button
+            type="button"
+            onClick={() => setShowDetails(!showDetails)}
+            className="cursor-pointer text-left"
+          >
+          {showDetails
+            ? t("ERROR_MESSAGE$HIDE_DETAILS")
+            : t("ERROR_MESSAGE$SHOW_DETAILS")}
+          </button>
+        )}
         {showDetails && <p className="text-neutral-300">{details}</p>}
       </div>
     </div>
diff --git a/frontend/src/routes/_oh.app.tsx b/frontend/src/routes/_oh.app.tsx
index cb1ef3b9c7e..7a4aaa70cc9 100644
--- a/frontend/src/routes/_oh.app.tsx
+++ b/frontend/src/routes/_oh.app.tsx
@@ -185,20 +185,6 @@ function App() {
     if (q) addIntialQueryToChat(q, files);
   }, [settings]);
 
-  const handleError = (message: string) => {
-    const [error, ...rest] = message.split(":");
-    const details = rest.join(":");
-    if (!details) {
-      dispatch(
-        addErrorMessage({
-          message: error,
-        }),
-      );
-    } else {
-      dispatch(addErrorMessage({ message: details }));
-    }
-  };
-
   const handleMessage = React.useCallback(
     (message: MessageEvent<WebSocket.Data>) => {
       // set token received from the server
@@ -224,7 +210,12 @@ function App() {
         return;
       }
       if (isErrorObservation(parsed)) {
-        handleError(parsed.message);
+        dispatch(
+          addErrorMessage({
+            id: parsed.extras?.error_id,
+            message: parsed.message,
+          }),
+        );
         return;
       }
 
diff --git a/openhands/events/observation/error.py b/openhands/events/observation/error.py
index c6139e86c0b..4ed05b89ac7 100644
--- a/openhands/events/observation/error.py
+++ b/openhands/events/observation/error.py
@@ -13,6 +13,7 @@ class ErrorObservation(Observation):
     """
 
     observation: str = ObservationType.ERROR
+    error_id: str = ''
 
     @property
     def message(self) -> str:

From 88a0eb4d776435ec2250b9de87419594e6a09a52 Mon Sep 17 00:00:00 2001
From: Robert Brennan <contact@rbren.io>
Date: Thu, 31 Oct 2024 19:19:27 -0400
Subject: [PATCH 59/60] add some error_ids

---
 frontend/src/i18n/translation.json                        | 6 ++++++
 openhands/runtime/impl/eventstream/eventstream_runtime.py | 3 +++
 openhands/runtime/impl/remote/remote_runtime.py           | 3 +++
 3 files changed, 12 insertions(+)

diff --git a/frontend/src/i18n/translation.json b/frontend/src/i18n/translation.json
index ab73c4f86e2..6db2520b6b8 100644
--- a/frontend/src/i18n/translation.json
+++ b/frontend/src/i18n/translation.json
@@ -1522,5 +1522,11 @@
   },
   "STATUS$ERROR_RUNTIME_DISCONNECTED": {
     "en": "There was an error while connecting to the runtime. Please refresh the page."
+  },
+  "AGENT_ERROR$BAD_ACTION": {
+    "en": "Agent tried to execute a malformed action."
+  },
+  "AGENT_ERROR$ACTION_TIMEOUT": {
+    "en": "Action timed out."
   }
 }
diff --git a/openhands/runtime/impl/eventstream/eventstream_runtime.py b/openhands/runtime/impl/eventstream/eventstream_runtime.py
index ddc47d0c66a..a04fb1381d1 100644
--- a/openhands/runtime/impl/eventstream/eventstream_runtime.py
+++ b/openhands/runtime/impl/eventstream/eventstream_runtime.py
@@ -457,10 +457,12 @@ def run_action(self, action: Action) -> Observation:
             if action_type not in ACTION_TYPE_TO_CLASS:
                 return ErrorObservation(
                     f'Action {action_type} does not exist.',
+                    error_id='AGENT_ERROR$BAD_ACTION',
                 )
             if not hasattr(self, action_type):
                 return ErrorObservation(
                     f'Action {action_type} is not supported in the current runtime.',
+                    error_id='AGENT_ERROR$BAD_ACTION',
                 )
             if (
                 getattr(action, 'confirmation_state', None)
@@ -489,6 +491,7 @@ def run_action(self, action: Action) -> Observation:
                 self.log('error', 'No response received within the timeout period.')
                 obs = ErrorObservation(
                     f'Action execution timed out after {action.timeout} seconds.',
+                    error_id='AGENT_ERROR$ACTION_TIMEOUT',
                 )
             self._refresh_logs()
             return obs
diff --git a/openhands/runtime/impl/remote/remote_runtime.py b/openhands/runtime/impl/remote/remote_runtime.py
index b47f48fb991..d290ba55c65 100644
--- a/openhands/runtime/impl/remote/remote_runtime.py
+++ b/openhands/runtime/impl/remote/remote_runtime.py
@@ -327,10 +327,12 @@ def run_action(self, action: Action) -> Observation:
             if action_type not in ACTION_TYPE_TO_CLASS:
                 return ErrorObservation(
                     f'[Runtime (ID={self.runtime_id})] Action {action_type} does not exist.',
+                    error_id='AGENT_ERROR$BAD_ACTION',
                 )
             if not hasattr(self, action_type):
                 return ErrorObservation(
                     f'[Runtime (ID={self.runtime_id})] Action {action_type} is not supported in the current runtime.',
+                    error_id='AGENT_ERROR$BAD_ACTION',
                 )
 
             assert action.timeout is not None
@@ -350,6 +352,7 @@ def run_action(self, action: Action) -> Observation:
             except requests.Timeout:
                 obs = ErrorObservation(
                     f'[Runtime (ID={self.runtime_id})] Action execution timed out',
+                    error_id='AGENT_ERROR$ACTION_TIMEOUT',
                 )
                 return obs
             return obs

From 2128d0bdfccf33618ad4b47cc8cfd643fa76e457 Mon Sep 17 00:00:00 2001
From: Robert Brennan <contact@rbren.io>
Date: Thu, 31 Oct 2024 19:31:41 -0400
Subject: [PATCH 60/60] fix type error

---
 frontend/src/components/error-message.tsx | 6 +++---
 frontend/src/types/core/observations.ts   | 3 +++
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/frontend/src/components/error-message.tsx b/frontend/src/components/error-message.tsx
index d2624864473..86454539f77 100644
--- a/frontend/src/components/error-message.tsx
+++ b/frontend/src/components/error-message.tsx
@@ -30,9 +30,9 @@ export function ErrorMessage({ id, message }: ErrorMessageProps) {
             onClick={() => setShowDetails(!showDetails)}
             className="cursor-pointer text-left"
           >
-          {showDetails
-            ? t("ERROR_MESSAGE$HIDE_DETAILS")
-            : t("ERROR_MESSAGE$SHOW_DETAILS")}
+            {showDetails
+              ? t("ERROR_MESSAGE$HIDE_DETAILS")
+              : t("ERROR_MESSAGE$SHOW_DETAILS")}
           </button>
         )}
         {showDetails && <p className="text-neutral-300">{details}</p>}
diff --git a/frontend/src/types/core/observations.ts b/frontend/src/types/core/observations.ts
index 9de2a70e8b1..21bafddf21d 100644
--- a/frontend/src/types/core/observations.ts
+++ b/frontend/src/types/core/observations.ts
@@ -54,6 +54,9 @@ export interface BrowseObservation extends OpenHandsObservationEvent<"browse"> {
 
 export interface ErrorObservation extends OpenHandsObservationEvent<"error"> {
   source: "user";
+  extras: {
+    error_id?: string;
+  };
 }
 
 export type OpenHandsObservation =