From 1dd8c3fdb100e13068d83dc5de9fc65529bca464 Mon Sep 17 00:00:00 2001 From: Robert Brennan Date: Fri, 25 Oct 2024 23:15:09 -0400 Subject: [PATCH 01/60] prevent evenstream from looping back on itself --- openhands/events/stream.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/openhands/events/stream.py b/openhands/events/stream.py index 1e4c3b9d539..cda1578295a 100644 --- a/openhands/events/stream.py +++ b/openhands/events/stream.py @@ -138,6 +138,10 @@ def add_event(self, event: Event, source: EventSource): asyncio.run(self.async_add_event(event, source)) async def async_add_event(self, event: Event, source: EventSource): + if event._id is not None: # type: ignore [attr-defined] + raise ValueError( + 'Event already has an ID. It was probably added back to the EventStream from inside a handler, trigging a loop.' + ) with self._lock: event._id = self._cur_id # type: ignore [attr-defined] self._cur_id += 1 From ba3c558ff7cd2dfb51967857c1c61253d603734e Mon Sep 17 00:00:00 2001 From: Robert Brennan Date: Fri, 25 Oct 2024 23:17:42 -0400 Subject: [PATCH 02/60] use hasattr --- openhands/events/stream.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openhands/events/stream.py b/openhands/events/stream.py index cda1578295a..b2368da2030 100644 --- a/openhands/events/stream.py +++ b/openhands/events/stream.py @@ -138,7 +138,7 @@ def add_event(self, event: Event, source: EventSource): asyncio.run(self.async_add_event(event, source)) async def async_add_event(self, event: Event, source: EventSource): - if event._id is not None: # type: ignore [attr-defined] + if hasattr(event, '_id') and event.id is not None: raise ValueError( 'Event already has an ID. It was probably added back to the EventStream from inside a handler, trigging a loop.' ) From 1b72a8966b406983c0ca9d45daf0355a364fcd62 Mon Sep 17 00:00:00 2001 From: Robert Brennan Date: Fri, 25 Oct 2024 23:30:56 -0400 Subject: [PATCH 03/60] refactor a bit --- evaluation/EDA/run_infer.py | 2 +- evaluation/agent_bench/run_infer.py | 2 +- evaluation/aider_bench/run_infer.py | 2 +- evaluation/biocoder/run_infer.py | 2 +- evaluation/bird/run_infer.py | 2 +- evaluation/browsing_delegation/run_infer.py | 2 +- evaluation/gaia/run_infer.py | 2 +- evaluation/gorilla/run_infer.py | 2 +- evaluation/gpqa/run_infer.py | 2 +- evaluation/humanevalfix/run_infer.py | 2 +- evaluation/integration_tests/run_infer.py | 2 +- evaluation/logic_reasoning/run_infer.py | 2 +- evaluation/miniwob/run_infer.py | 2 +- evaluation/mint/run_infer.py | 2 +- evaluation/swe_bench/run_infer.py | 8 ++-- .../swe_bench/scripts/eval/compare_outputs.py | 0 .../scripts/eval/convert_oh_output_to_md.py | 0 .../scripts/eval/summarize_outputs.py | 0 .../scripts/setup/compare_patch_filename.py | 0 evaluation/toolqa/run_infer.py | 2 +- evaluation/webarena/run_infer.py | 2 +- openhands/controller/agent_controller.py | 38 ++++++++----------- openhands/controller/state/state.py | 13 +++++-- 23 files changed, 44 insertions(+), 47 deletions(-) mode change 100755 => 100644 evaluation/swe_bench/scripts/eval/compare_outputs.py mode change 100755 => 100644 evaluation/swe_bench/scripts/eval/convert_oh_output_to_md.py mode change 100755 => 100644 evaluation/swe_bench/scripts/eval/summarize_outputs.py mode change 100755 => 100644 evaluation/swe_bench/scripts/setup/compare_patch_filename.py diff --git a/evaluation/EDA/run_infer.py b/evaluation/EDA/run_infer.py index 2c896939a75..c4f6c118411 100644 --- a/evaluation/EDA/run_infer.py +++ b/evaluation/EDA/run_infer.py @@ -158,7 +158,7 @@ def process_instance( metadata=metadata, history=histories, metrics=metrics, - error=state.last_error if state and state.last_error else None, + error=state.get_last_error() if state and state.get_last_error() else None, test_result={ 'success': test_result, 'final_message': final_message, diff --git a/evaluation/agent_bench/run_infer.py b/evaluation/agent_bench/run_infer.py index d6fcc62e079..e05c66e9135 100644 --- a/evaluation/agent_bench/run_infer.py +++ b/evaluation/agent_bench/run_infer.py @@ -283,7 +283,7 @@ def process_instance( metadata=metadata, history=histories, metrics=metrics, - error=state.last_error if state and state.last_error else None, + error=state.get_last_error() if state and state.get_last_error() else None, test_result={ 'agent_answer': agent_answer, 'final_answer': final_ans, diff --git a/evaluation/aider_bench/run_infer.py b/evaluation/aider_bench/run_infer.py index fa1bb9534a8..9b7f33d404c 100644 --- a/evaluation/aider_bench/run_infer.py +++ b/evaluation/aider_bench/run_infer.py @@ -261,7 +261,7 @@ def process_instance( metadata=metadata, history=histories, metrics=metrics, - error=state.last_error if state and state.last_error else None, + error=state.get_last_error() if state and state.get_last_error() else None, test_result=test_result, ) return output diff --git a/evaluation/biocoder/run_infer.py b/evaluation/biocoder/run_infer.py index 4535ccba4e4..a05a6aa83aa 100644 --- a/evaluation/biocoder/run_infer.py +++ b/evaluation/biocoder/run_infer.py @@ -311,7 +311,7 @@ def process_instance( metadata=metadata, history=histories, metrics=metrics, - error=state.last_error if state and state.last_error else None, + error=state.get_last_error() if state and state.get_last_error() else None, test_result=test_result, ) return output diff --git a/evaluation/bird/run_infer.py b/evaluation/bird/run_infer.py index adb498cd2eb..d81e873636c 100644 --- a/evaluation/bird/run_infer.py +++ b/evaluation/bird/run_infer.py @@ -440,7 +440,7 @@ def execute_sql(db_path, sql): metadata=metadata, history=histories, metrics=metrics, - error=state.last_error if state and state.last_error else None, + error=state.get_last_error() if state and state.get_last_error() else None, test_result=test_result, ) return output diff --git a/evaluation/browsing_delegation/run_infer.py b/evaluation/browsing_delegation/run_infer.py index c9fe2ebd18b..38c91cae18d 100644 --- a/evaluation/browsing_delegation/run_infer.py +++ b/evaluation/browsing_delegation/run_infer.py @@ -121,7 +121,7 @@ def process_instance( metadata=metadata, history=histories, metrics=metrics, - error=state.last_error if state and state.last_error else None, + error=state.get_last_error() if state and state.get_last_error() else None, test_result={ 'query': instance.instruction, 'action': last_delegate_action, diff --git a/evaluation/gaia/run_infer.py b/evaluation/gaia/run_infer.py index c02cd0aee73..d72a8a449ea 100644 --- a/evaluation/gaia/run_infer.py +++ b/evaluation/gaia/run_infer.py @@ -213,7 +213,7 @@ def process_instance( metadata=metadata, history=histories, metrics=metrics, - error=state.last_error if state and state.last_error else None, + error=state.get_last_error() if state and state.get_last_error() else None, test_result=test_result, ) return output diff --git a/evaluation/gorilla/run_infer.py b/evaluation/gorilla/run_infer.py index 873cb7f8969..9881cea88c9 100644 --- a/evaluation/gorilla/run_infer.py +++ b/evaluation/gorilla/run_infer.py @@ -121,7 +121,7 @@ def process_instance( metadata=metadata, history=histories, metrics=metrics, - error=state.last_error if state and state.last_error else None, + error=state.get_last_error() if state and state.get_last_error() else None, test_result={ 'text': model_answer_raw, 'correct': correct, diff --git a/evaluation/gpqa/run_infer.py b/evaluation/gpqa/run_infer.py index 8fd4034c9d5..81f09ef5223 100644 --- a/evaluation/gpqa/run_infer.py +++ b/evaluation/gpqa/run_infer.py @@ -302,7 +302,7 @@ def process_instance( metadata=metadata, history=state.history.compatibility_for_eval_history_pairs(), metrics=metrics, - error=state.last_error if state and state.last_error else None, + error=state.get_last_error() if state and state.get_last_error() else None, test_result={ 'result': test_result, 'found_answers': found_answers, diff --git a/evaluation/humanevalfix/run_infer.py b/evaluation/humanevalfix/run_infer.py index 25fee65561f..ae445faaa4a 100644 --- a/evaluation/humanevalfix/run_infer.py +++ b/evaluation/humanevalfix/run_infer.py @@ -264,7 +264,7 @@ def process_instance( metadata=metadata, history=histories, metrics=metrics, - error=state.last_error if state and state.last_error else None, + error=state.get_last_error() if state and state.get_last_error() else None, test_result=test_result, ) return output diff --git a/evaluation/integration_tests/run_infer.py b/evaluation/integration_tests/run_infer.py index a530041f92f..95dbb447841 100644 --- a/evaluation/integration_tests/run_infer.py +++ b/evaluation/integration_tests/run_infer.py @@ -134,7 +134,7 @@ def process_instance( metadata=metadata, history=histories, metrics=metrics, - error=state.last_error if state and state.last_error else None, + error=state.get_last_error() if state and state.get_last_error() else None, test_result=test_result.model_dump(), ) return output diff --git a/evaluation/logic_reasoning/run_infer.py b/evaluation/logic_reasoning/run_infer.py index 5b7d35f2113..57e0ec9582f 100644 --- a/evaluation/logic_reasoning/run_infer.py +++ b/evaluation/logic_reasoning/run_infer.py @@ -256,7 +256,7 @@ def process_instance( metadata=metadata, history=histories, metrics=metrics, - error=state.last_error if state and state.last_error else None, + error=state.get_last_error() if state and state.get_last_error() else None, test_result=test_result, ) return output diff --git a/evaluation/miniwob/run_infer.py b/evaluation/miniwob/run_infer.py index 9c2aaf1e096..58fc9da6cc1 100644 --- a/evaluation/miniwob/run_infer.py +++ b/evaluation/miniwob/run_infer.py @@ -173,7 +173,7 @@ def process_instance( metadata=metadata, history=histories, metrics=metrics, - error=state.last_error if state and state.last_error else None, + error=state.get_last_error() if state and state.get_last_error() else None, test_result={ 'reward': reward, }, diff --git a/evaluation/mint/run_infer.py b/evaluation/mint/run_infer.py index 8017b194d8d..554940ebaad 100644 --- a/evaluation/mint/run_infer.py +++ b/evaluation/mint/run_infer.py @@ -212,7 +212,7 @@ def process_instance( metadata=metadata, history=histories, metrics=metrics, - error=state.last_error if state and state.last_error else None, + error=state.get_last_error() if state and state.get_last_error() else None, test_result={ 'success': task_state.success if task_state else False, }, diff --git a/evaluation/swe_bench/run_infer.py b/evaluation/swe_bench/run_infer.py index 9ac1e0cf663..8ca66056e65 100644 --- a/evaluation/swe_bench/run_infer.py +++ b/evaluation/swe_bench/run_infer.py @@ -402,10 +402,10 @@ def process_instance( # if fatal error, throw EvalError to trigger re-run if ( - state.last_error - and 'fatal error during agent execution' in state.last_error + state.get_last_error() + and 'fatal error during agent execution' in state.get_last_error() ): - raise EvalException('Fatal error detected: ' + state.last_error) + raise EvalException('Fatal error detected: ' + state.get_last_error()) # ======= THIS IS SWE-Bench specific ======= # Get git patch @@ -442,7 +442,7 @@ def process_instance( metadata=metadata, history=histories, metrics=metrics, - error=state.last_error if state and state.last_error else None, + error=state.get_last_error() if state and state.get_last_error() else None, ) return output diff --git a/evaluation/swe_bench/scripts/eval/compare_outputs.py b/evaluation/swe_bench/scripts/eval/compare_outputs.py old mode 100755 new mode 100644 diff --git a/evaluation/swe_bench/scripts/eval/convert_oh_output_to_md.py b/evaluation/swe_bench/scripts/eval/convert_oh_output_to_md.py old mode 100755 new mode 100644 diff --git a/evaluation/swe_bench/scripts/eval/summarize_outputs.py b/evaluation/swe_bench/scripts/eval/summarize_outputs.py old mode 100755 new mode 100644 diff --git a/evaluation/swe_bench/scripts/setup/compare_patch_filename.py b/evaluation/swe_bench/scripts/setup/compare_patch_filename.py old mode 100755 new mode 100644 diff --git a/evaluation/toolqa/run_infer.py b/evaluation/toolqa/run_infer.py index 5c2c5342278..c10d19fdf13 100644 --- a/evaluation/toolqa/run_infer.py +++ b/evaluation/toolqa/run_infer.py @@ -149,7 +149,7 @@ def process_instance(instance: Any, metadata: EvalMetadata, reset_logger: bool = metadata=metadata, history=histories, metrics=metrics, - error=state.last_error if state and state.last_error else None, + error=state.get_last_error() if state and state.get_last_error() else None, ) return output diff --git a/evaluation/webarena/run_infer.py b/evaluation/webarena/run_infer.py index cfc2bdae493..d818f77307a 100644 --- a/evaluation/webarena/run_infer.py +++ b/evaluation/webarena/run_infer.py @@ -187,7 +187,7 @@ def process_instance( metadata=metadata, history=histories, metrics=metrics, - error=state.last_error if state and state.last_error else None, + error=state.get_last_error() if state and state.get_last_error() else None, test_result={ 'reward': reward, }, diff --git a/openhands/controller/agent_controller.py b/openhands/controller/agent_controller.py index 946f6a7d327..0e921508f9c 100644 --- a/openhands/controller/agent_controller.py +++ b/openhands/controller/agent_controller.py @@ -133,21 +133,14 @@ async def update_state_after_step(self): # update metrics especially for cost. Use deepcopy to avoid it being modified by agent.reset() self.state.local_metrics = copy.deepcopy(self.agent.llm.metrics) - async def report_error(self, message: str, exception: Exception | None = None): - """Reports an error to the user and sends the exception to the LLM next step, in the hope it can self-correct. - - This method should be called for a particular type of errors, which have: - - a user-friendly message, which will be shown in the chat box. This should not be a raw exception message. - - an ErrorObservation that can be sent to the LLM by the user role, with the exception message, so it can self-correct next time. - """ - self.state.last_error = message - if exception: - self.state.last_error += f': {exception}' + async def send_error_to_event_stream( + self, message: str, exception: Exception | None = None + ): detail = str(exception) if exception is not None else '' if exception is not None and isinstance(exception, litellm.AuthenticationError): - detail = 'Please check your credentials. Is your API key correct?' + detail += '\nPlease check your credentials. Is your API key correct?' self.event_stream.add_event( - ErrorObservation(f'{message}:{detail}'), EventSource.USER + ErrorObservation(f'{message}:{detail}'), EventSource.AGENT ) async def start_step_loop(self): @@ -164,7 +157,7 @@ async def start_step_loop(self): traceback.print_exc() logger.error(f'Error while running the agent: {e}') logger.error(traceback.format_exc()) - await self.report_error( + await self.send_error_to_event_stream( 'There was an unexpected error while running the agent', exception=e ) await self.set_agent_state_to(AgentState.ERROR) @@ -254,9 +247,6 @@ async def _handle_observation(self, observation: Observation): if self.state.agent_state == AgentState.ERROR: self.state.metrics.merge(self.state.local_metrics) elif isinstance(observation, FatalErrorObservation): - await self.report_error( - 'There was a fatal error during agent execution: ' + str(observation) - ) await self.set_agent_state_to(AgentState.ERROR) self.state.metrics.merge(self.state.local_metrics) @@ -443,7 +433,7 @@ async def _step(self) -> None: except (LLMMalformedActionError, LLMNoActionError, LLMResponseError) as e: # report to the user # and send the underlying exception to the LLM for self-correction - await self.report_error(str(e)) + await self.send_error_to_event_stream(str(e)) return if action.runnable: @@ -468,9 +458,9 @@ async def _step(self) -> None: logger.info(action, extra={'msg_type': 'ACTION'}) if self._is_stuck(): - # This need to go BEFORE report_error to sync metrics + # This need to go BEFORE send_error_to_event_stream to sync metrics await self.set_agent_state_to(AgentState.ERROR) - await self.report_error('Agent got stuck in a loop') + await self.send_error_to_event_stream('Agent got stuck in a loop') async def _delegate_step(self): """Executes a single step of the delegate agent.""" @@ -489,7 +479,9 @@ async def _delegate_step(self): self.delegate = None self.delegateAction = None - await self.report_error('Delegator agent encountered an error') + await self.send_error_to_event_stream( + 'Delegator agent encountered an error' + ) elif delegate_state in (AgentState.FINISHED, AgentState.REJECTED): logger.info( f'[Agent Controller {self.id}] Delegate agent has finished execution' @@ -538,17 +530,17 @@ async def _handle_traffic_control( else: self.state.traffic_control_state = TrafficControlState.THROTTLING if self.headless_mode: - # This need to go BEFORE report_error to sync metrics + # This need to go BEFORE send_error_to_event_stream to sync metrics await self.set_agent_state_to(AgentState.ERROR) # set to ERROR state if running in headless mode # since user cannot resume on the web interface - await self.report_error( + await self.send_error_to_event_stream( f'Agent reached maximum {limit_type} in headless mode, task stopped. ' f'Current {limit_type}: {current_value:.2f}, max {limit_type}: {max_value:.2f}' ) else: await self.set_agent_state_to(AgentState.PAUSED) - await self.report_error( + await self.send_error_to_event_stream( f'Agent reached maximum {limit_type}, task paused. ' f'Current {limit_type}: {current_value:.2f}, max {limit_type}: {max_value:.2f}. ' f'{TRAFFIC_CONTROL_REMINDER}' diff --git a/openhands/controller/state/state.py b/openhands/controller/state/state.py index e14d44517a5..d204c9609ec 100644 --- a/openhands/controller/state/state.py +++ b/openhands/controller/state/state.py @@ -11,6 +11,7 @@ MessageAction, ) from openhands.events.action.agent import AgentFinishAction +from openhands.events.observation import ErrorObservation, FatalErrorObservation from openhands.llm.metrics import Metrics from openhands.memory.history import ShortTermHistory from openhands.storage.files import FileStore @@ -80,7 +81,6 @@ class State: history: ShortTermHistory = field(default_factory=ShortTermHistory) inputs: dict = field(default_factory=dict) outputs: dict = field(default_factory=dict) - last_error: str | None = None agent_state: AgentState = AgentState.LOADING resume_state: AgentState | None = None traffic_control_state: TrafficControlState = TrafficControlState.NORMAL @@ -124,9 +124,6 @@ def restore_from_session(sid: str, file_store: FileStore) -> 'State': else: state.resume_state = None - # don't carry last_error anymore after restore - state.last_error = None - # first state after restore state.agent_state = AgentState.LOADING return state @@ -157,6 +154,14 @@ def __setstate__(self, state): # remove the restored data from the state if any + def get_last_error(self) -> str: + for event in self.history.get_events(reverse=True): + if isinstance(event, ErrorObservation) or isinstance( + event, FatalErrorObservation + ): + return event.content + return '' + def get_current_user_intent(self): """Returns the latest user message and image(if provided) that appears after a FinishAction, or the first (the task) if nothing was finished yet.""" last_user_message = None From ed3d94499ab15ab98e0863d19acdcbe140d83e19 Mon Sep 17 00:00:00 2001 From: Robert Brennan Date: Fri, 25 Oct 2024 23:31:40 -0400 Subject: [PATCH 04/60] revert perms --- evaluation/swe_bench/scripts/eval/compare_outputs.py | 0 evaluation/swe_bench/scripts/eval/convert_oh_output_to_md.py | 0 evaluation/swe_bench/scripts/eval/summarize_outputs.py | 0 evaluation/swe_bench/scripts/setup/compare_patch_filename.py | 0 4 files changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 evaluation/swe_bench/scripts/eval/compare_outputs.py mode change 100644 => 100755 evaluation/swe_bench/scripts/eval/convert_oh_output_to_md.py mode change 100644 => 100755 evaluation/swe_bench/scripts/eval/summarize_outputs.py mode change 100644 => 100755 evaluation/swe_bench/scripts/setup/compare_patch_filename.py diff --git a/evaluation/swe_bench/scripts/eval/compare_outputs.py b/evaluation/swe_bench/scripts/eval/compare_outputs.py old mode 100644 new mode 100755 diff --git a/evaluation/swe_bench/scripts/eval/convert_oh_output_to_md.py b/evaluation/swe_bench/scripts/eval/convert_oh_output_to_md.py old mode 100644 new mode 100755 diff --git a/evaluation/swe_bench/scripts/eval/summarize_outputs.py b/evaluation/swe_bench/scripts/eval/summarize_outputs.py old mode 100644 new mode 100755 diff --git a/evaluation/swe_bench/scripts/setup/compare_patch_filename.py b/evaluation/swe_bench/scripts/setup/compare_patch_filename.py old mode 100644 new mode 100755 From 57662b625819b47f7f1d5e4baf0c22261169279c Mon Sep 17 00:00:00 2001 From: Robert Brennan Date: Fri, 25 Oct 2024 23:34:29 -0400 Subject: [PATCH 05/60] private method --- openhands/controller/agent_controller.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/openhands/controller/agent_controller.py b/openhands/controller/agent_controller.py index 0e921508f9c..11db85c33d7 100644 --- a/openhands/controller/agent_controller.py +++ b/openhands/controller/agent_controller.py @@ -133,7 +133,7 @@ async def update_state_after_step(self): # update metrics especially for cost. Use deepcopy to avoid it being modified by agent.reset() self.state.local_metrics = copy.deepcopy(self.agent.llm.metrics) - async def send_error_to_event_stream( + async def _send_error_to_event_stream( self, message: str, exception: Exception | None = None ): detail = str(exception) if exception is not None else '' @@ -157,7 +157,7 @@ async def start_step_loop(self): traceback.print_exc() logger.error(f'Error while running the agent: {e}') logger.error(traceback.format_exc()) - await self.send_error_to_event_stream( + await self._send_error_to_event_stream( 'There was an unexpected error while running the agent', exception=e ) await self.set_agent_state_to(AgentState.ERROR) @@ -433,7 +433,7 @@ async def _step(self) -> None: except (LLMMalformedActionError, LLMNoActionError, LLMResponseError) as e: # report to the user # and send the underlying exception to the LLM for self-correction - await self.send_error_to_event_stream(str(e)) + await self._send_error_to_event_stream(str(e)) return if action.runnable: @@ -458,9 +458,9 @@ async def _step(self) -> None: logger.info(action, extra={'msg_type': 'ACTION'}) if self._is_stuck(): - # This need to go BEFORE send_error_to_event_stream to sync metrics + # This need to go BEFORE _send_error_to_event_stream to sync metrics await self.set_agent_state_to(AgentState.ERROR) - await self.send_error_to_event_stream('Agent got stuck in a loop') + await self._send_error_to_event_stream('Agent got stuck in a loop') async def _delegate_step(self): """Executes a single step of the delegate agent.""" @@ -479,7 +479,7 @@ async def _delegate_step(self): self.delegate = None self.delegateAction = None - await self.send_error_to_event_stream( + await self._send_error_to_event_stream( 'Delegator agent encountered an error' ) elif delegate_state in (AgentState.FINISHED, AgentState.REJECTED): @@ -530,17 +530,17 @@ async def _handle_traffic_control( else: self.state.traffic_control_state = TrafficControlState.THROTTLING if self.headless_mode: - # This need to go BEFORE send_error_to_event_stream to sync metrics + # This need to go BEFORE _send_error_to_event_stream to sync metrics await self.set_agent_state_to(AgentState.ERROR) # set to ERROR state if running in headless mode # since user cannot resume on the web interface - await self.send_error_to_event_stream( + await self._send_error_to_event_stream( f'Agent reached maximum {limit_type} in headless mode, task stopped. ' f'Current {limit_type}: {current_value:.2f}, max {limit_type}: {max_value:.2f}' ) else: await self.set_agent_state_to(AgentState.PAUSED) - await self.send_error_to_event_stream( + await self._send_error_to_event_stream( f'Agent reached maximum {limit_type}, task paused. ' f'Current {limit_type}: {current_value:.2f}, max {limit_type}: {max_value:.2f}. ' f'{TRAFFIC_CONTROL_REMINDER}' From a7ce34e2692e1cce0ebb61f4b4b77d148c481615 Mon Sep 17 00:00:00 2001 From: Robert Brennan Date: Fri, 25 Oct 2024 23:45:25 -0400 Subject: [PATCH 06/60] async_add_event --- openhands/controller/agent_controller.py | 53 +++++++++++++----------- 1 file changed, 28 insertions(+), 25 deletions(-) diff --git a/openhands/controller/agent_controller.py b/openhands/controller/agent_controller.py index 11db85c33d7..9550286c316 100644 --- a/openhands/controller/agent_controller.py +++ b/openhands/controller/agent_controller.py @@ -133,13 +133,18 @@ async def update_state_after_step(self): # update metrics especially for cost. Use deepcopy to avoid it being modified by agent.reset() self.state.local_metrics = copy.deepcopy(self.agent.llm.metrics) - async def _send_error_to_event_stream( - self, message: str, exception: Exception | None = None + async def _react_to_error( + self, + message: str, + exception: Exception | None = None, + new_state: AgentState | None = None, ): + if new_state is not None: + await self.set_agent_state_to(new_state) detail = str(exception) if exception is not None else '' if exception is not None and isinstance(exception, litellm.AuthenticationError): detail += '\nPlease check your credentials. Is your API key correct?' - self.event_stream.add_event( + await self.event_stream.async_add_event( ErrorObservation(f'{message}:{detail}'), EventSource.AGENT ) @@ -157,10 +162,11 @@ async def start_step_loop(self): traceback.print_exc() logger.error(f'Error while running the agent: {e}') logger.error(traceback.format_exc()) - await self._send_error_to_event_stream( - 'There was an unexpected error while running the agent', exception=e + await self._react_to_error( + 'There was an unexpected error while running the agent', + exception=e, + new_state=AgentState.ERROR, ) - await self.set_agent_state_to(AgentState.ERROR) break await asyncio.sleep(0.1) @@ -320,7 +326,9 @@ async def set_agent_state_to(self, new_state: AgentState): else: confirmation_state = ActionConfirmationStatus.REJECTED self._pending_action.confirmation_state = confirmation_state # type: ignore[attr-defined] - self.event_stream.add_event(self._pending_action, EventSource.AGENT) + await self.event_stream.async_add_event( + self._pending_action, EventSource.AGENT + ) self.state.agent_state = new_state self.event_stream.add_event( @@ -433,7 +441,7 @@ async def _step(self) -> None: except (LLMMalformedActionError, LLMNoActionError, LLMResponseError) as e: # report to the user # and send the underlying exception to the LLM for self-correction - await self._send_error_to_event_stream(str(e)) + await self._react_to_error(str(e)) return if action.runnable: @@ -452,15 +460,15 @@ async def _step(self) -> None: == ActionConfirmationStatus.AWAITING_CONFIRMATION ): await self.set_agent_state_to(AgentState.AWAITING_USER_CONFIRMATION) - self.event_stream.add_event(action, EventSource.AGENT) + await self.event_stream.async_add_event(action, EventSource.AGENT) await self.update_state_after_step() logger.info(action, extra={'msg_type': 'ACTION'}) if self._is_stuck(): - # This need to go BEFORE _send_error_to_event_stream to sync metrics - await self.set_agent_state_to(AgentState.ERROR) - await self._send_error_to_event_stream('Agent got stuck in a loop') + await self._react_to_error( + 'Agent got stuck in a loop', new_state=AgentState.ERROR + ) async def _delegate_step(self): """Executes a single step of the delegate agent.""" @@ -479,9 +487,7 @@ async def _delegate_step(self): self.delegate = None self.delegateAction = None - await self._send_error_to_event_stream( - 'Delegator agent encountered an error' - ) + await self._react_to_error('Delegator agent encountered an error') elif delegate_state in (AgentState.FINISHED, AgentState.REJECTED): logger.info( f'[Agent Controller {self.id}] Delegate agent has finished execution' @@ -510,7 +516,7 @@ async def _delegate_step(self): # clean up delegate status self.delegate = None self.delegateAction = None - self.event_stream.add_event(obs, EventSource.AGENT) + await self.event_stream.async_add_event(obs, EventSource.AGENT) return async def _handle_traffic_control( @@ -530,20 +536,17 @@ async def _handle_traffic_control( else: self.state.traffic_control_state = TrafficControlState.THROTTLING if self.headless_mode: - # This need to go BEFORE _send_error_to_event_stream to sync metrics - await self.set_agent_state_to(AgentState.ERROR) - # set to ERROR state if running in headless mode - # since user cannot resume on the web interface - await self._send_error_to_event_stream( + await self._react_to_error( f'Agent reached maximum {limit_type} in headless mode, task stopped. ' - f'Current {limit_type}: {current_value:.2f}, max {limit_type}: {max_value:.2f}' + f'Current {limit_type}: {current_value:.2f}, max {limit_type}: {max_value:.2f}', + new_state=AgentState.ERROR, ) else: - await self.set_agent_state_to(AgentState.PAUSED) - await self._send_error_to_event_stream( + await self._react_to_error( f'Agent reached maximum {limit_type}, task paused. ' f'Current {limit_type}: {current_value:.2f}, max {limit_type}: {max_value:.2f}. ' - f'{TRAFFIC_CONTROL_REMINDER}' + f'{TRAFFIC_CONTROL_REMINDER}', + new_state=AgentState.PAUSED, ) stop_step = True return stop_step From d8becc23e39ea2a718858fae6bd312b12e5a873d Mon Sep 17 00:00:00 2001 From: Robert Brennan Date: Fri, 25 Oct 2024 23:52:00 -0400 Subject: [PATCH 07/60] remove unnecessary return --- openhands/controller/agent_controller.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/openhands/controller/agent_controller.py b/openhands/controller/agent_controller.py index 9550286c316..22e466a8800 100644 --- a/openhands/controller/agent_controller.py +++ b/openhands/controller/agent_controller.py @@ -35,7 +35,6 @@ from openhands.events.observation import ( AgentDelegateObservation, AgentStateChangedObservation, - CmdOutputObservation, ErrorObservation, FatalErrorObservation, Observation, @@ -245,9 +244,7 @@ async def _handle_observation(self, observation: Observation): await self.set_agent_state_to(AgentState.AWAITING_USER_INPUT) return - if isinstance(observation, CmdOutputObservation): - return - elif isinstance(observation, AgentDelegateObservation): + if isinstance(observation, AgentDelegateObservation): self.state.history.on_event(observation) elif isinstance(observation, ErrorObservation): if self.state.agent_state == AgentState.ERROR: From d9811b00a6f941ccf2b9c29007ba1da56b1d541a Mon Sep 17 00:00:00 2001 From: Robert Brennan Date: Fri, 25 Oct 2024 23:53:02 -0400 Subject: [PATCH 08/60] stop dropping events while pending_action is set --- openhands/controller/agent_controller.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/openhands/controller/agent_controller.py b/openhands/controller/agent_controller.py index 22e466a8800..ab1e7b84caf 100644 --- a/openhands/controller/agent_controller.py +++ b/openhands/controller/agent_controller.py @@ -216,15 +216,6 @@ async def _handle_observation(self, observation: Observation): Args: observation (observation): The observation to handle. """ - if ( - self._pending_action - and hasattr(self._pending_action, 'confirmation_state') - and self._pending_action.confirmation_state - == ActionConfirmationStatus.AWAITING_CONFIRMATION - ): - return - - # Make sure we print the observation in the same way as the LLM sees it observation_to_print = copy.deepcopy(observation) if len(observation_to_print.content) > self.agent.llm.config.max_message_chars: observation_to_print.content = truncate_content( @@ -232,7 +223,6 @@ async def _handle_observation(self, observation: Observation): ) logger.info(observation_to_print, extra={'msg_type': 'OBSERVATION'}) - # Merge with the metrics from the LLM - it will to synced to the controller's local metrics in update_state_after_step() if observation.llm_metrics is not None: self.agent.llm.metrics.merge(observation.llm_metrics) From 47a95f70fa7317a76de7810c5df5b90aac66acac Mon Sep 17 00:00:00 2001 From: Robert Brennan Date: Fri, 25 Oct 2024 23:58:47 -0400 Subject: [PATCH 09/60] remove FatalErrorObservation --- openhands/controller/agent_controller.py | 6 ++--- openhands/controller/state/state.py | 6 ++--- openhands/events/observation/__init__.py | 3 +-- openhands/events/observation/error.py | 15 +---------- openhands/runtime/action_execution_server.py | 3 +-- .../impl/eventstream/eventstream_runtime.py | 22 +++++++++------- .../runtime/impl/remote/remote_runtime.py | 26 +++++++++++-------- openhands/runtime/utils/bash.py | 9 ++++--- openhands/runtime/utils/edit.py | 11 ++++---- 9 files changed, 46 insertions(+), 55 deletions(-) diff --git a/openhands/controller/agent_controller.py b/openhands/controller/agent_controller.py index ab1e7b84caf..59f6417c40c 100644 --- a/openhands/controller/agent_controller.py +++ b/openhands/controller/agent_controller.py @@ -36,7 +36,6 @@ AgentDelegateObservation, AgentStateChangedObservation, ErrorObservation, - FatalErrorObservation, Observation, ) from openhands.events.serialization.event import truncate_content @@ -237,11 +236,10 @@ async def _handle_observation(self, observation: Observation): if isinstance(observation, AgentDelegateObservation): self.state.history.on_event(observation) elif isinstance(observation, ErrorObservation): + if observation.fatal: + await self.set_agent_state_to(AgentState.ERROR) if self.state.agent_state == AgentState.ERROR: self.state.metrics.merge(self.state.local_metrics) - elif isinstance(observation, FatalErrorObservation): - await self.set_agent_state_to(AgentState.ERROR) - self.state.metrics.merge(self.state.local_metrics) async def _handle_message_action(self, action: MessageAction): """Handles message actions from the event stream. diff --git a/openhands/controller/state/state.py b/openhands/controller/state/state.py index d204c9609ec..2ef4ac20350 100644 --- a/openhands/controller/state/state.py +++ b/openhands/controller/state/state.py @@ -11,7 +11,7 @@ MessageAction, ) from openhands.events.action.agent import AgentFinishAction -from openhands.events.observation import ErrorObservation, FatalErrorObservation +from openhands.events.observation import ErrorObservation from openhands.llm.metrics import Metrics from openhands.memory.history import ShortTermHistory from openhands.storage.files import FileStore @@ -156,9 +156,7 @@ def __setstate__(self, state): def get_last_error(self) -> str: for event in self.history.get_events(reverse=True): - if isinstance(event, ErrorObservation) or isinstance( - event, FatalErrorObservation - ): + if isinstance(event, ErrorObservation): return event.content return '' diff --git a/openhands/events/observation/__init__.py b/openhands/events/observation/__init__.py index a0fad86dfb4..28525b09aab 100644 --- a/openhands/events/observation/__init__.py +++ b/openhands/events/observation/__init__.py @@ -6,7 +6,7 @@ ) from openhands.events.observation.delegate import AgentDelegateObservation from openhands.events.observation.empty import NullObservation -from openhands.events.observation.error import ErrorObservation, FatalErrorObservation +from openhands.events.observation.error import ErrorObservation from openhands.events.observation.files import ( FileEditObservation, FileReadObservation, @@ -26,7 +26,6 @@ 'FileWriteObservation', 'FileEditObservation', 'ErrorObservation', - 'FatalErrorObservation', 'AgentStateChangedObservation', 'AgentDelegateObservation', 'SuccessObservation', diff --git a/openhands/events/observation/error.py b/openhands/events/observation/error.py index cfbb291eb0f..d10fa2b8b75 100644 --- a/openhands/events/observation/error.py +++ b/openhands/events/observation/error.py @@ -12,6 +12,7 @@ class ErrorObservation(Observation): E.g., Linter error after editing a file. """ + fatal: bool = False observation: str = ObservationType.ERROR @property @@ -20,17 +21,3 @@ def message(self) -> str: def __str__(self) -> str: return f'**ErrorObservation**\n{self.content}' - - -@dataclass -class FatalErrorObservation(Observation): - """This data class represents a fatal error encountered by the agent. - - This is the type of error that LLM CANNOT recover from, and the agent controller should stop the execution and report the error to the user. - E.g., Remote runtime action execution failure: 503 Server Error: Service Unavailable for url OR 404 Not Found. - """ - - observation: str = ObservationType.ERROR - - def __str__(self) -> str: - return f'**FatalErrorObservation**\n{self.content}' diff --git a/openhands/runtime/action_execution_server.py b/openhands/runtime/action_execution_server.py index 2da1372532d..b9f37b6dada 100644 --- a/openhands/runtime/action_execution_server.py +++ b/openhands/runtime/action_execution_server.py @@ -37,7 +37,6 @@ from openhands.events.observation import ( CmdOutputObservation, ErrorObservation, - FatalErrorObservation, FileReadObservation, FileWriteObservation, IPythonRunCellObservation, @@ -167,7 +166,7 @@ async def run_action(self, action) -> Observation: async def run( self, action: CmdRunAction - ) -> CmdOutputObservation | FatalErrorObservation: + ) -> CmdOutputObservation | ErrorObservation: return self.bash_session.run(action) async def run_ipython(self, action: IPythonRunCellAction) -> Observation: diff --git a/openhands/runtime/impl/eventstream/eventstream_runtime.py b/openhands/runtime/impl/eventstream/eventstream_runtime.py index 1cf40969225..60f9231a723 100644 --- a/openhands/runtime/impl/eventstream/eventstream_runtime.py +++ b/openhands/runtime/impl/eventstream/eventstream_runtime.py @@ -25,7 +25,7 @@ ) from openhands.events.action.action import Action from openhands.events.observation import ( - FatalErrorObservation, + ErrorObservation, NullObservation, Observation, UserRejectObservation, @@ -452,10 +452,13 @@ def run_action(self, action: Action) -> Observation: return NullObservation('') action_type = action.action # type: ignore[attr-defined] if action_type not in ACTION_TYPE_TO_CLASS: - return FatalErrorObservation(f'Action {action_type} does not exist.') + return ErrorObservation( + f'Action {action_type} does not exist.', fatal=True + ) if not hasattr(self, action_type): - return FatalErrorObservation( - f'Action {action_type} is not supported in the current runtime.' + return ErrorObservation( + f'Action {action_type} is not supported in the current runtime.', + fatal=True, ) if ( getattr(action, 'confirmation_state', None) @@ -486,17 +489,18 @@ def run_action(self, action: Action) -> Observation: logger.debug(f'response: {response}') error_message = response.text logger.error(f'Error from server: {error_message}') - obs = FatalErrorObservation( - f'Action execution failed: {error_message}' + obs = ErrorObservation( + f'Action execution failed: {error_message}', fatal=True ) except requests.Timeout: logger.error('No response received within the timeout period.') - obs = FatalErrorObservation( - f'Action execution timed out after {action.timeout} seconds.' + obs = ErrorObservation( + f'Action execution timed out after {action.timeout} seconds.', + fatal=True, ) except Exception as e: logger.error(f'Error during action execution: {e}') - obs = FatalErrorObservation(f'Action execution failed: {str(e)}') + obs = ErrorObservation(f'Action execution failed: {str(e)}', fatal=True) self._refresh_logs() return obs diff --git a/openhands/runtime/impl/remote/remote_runtime.py b/openhands/runtime/impl/remote/remote_runtime.py index 8c25401af9c..f921d39e432 100644 --- a/openhands/runtime/impl/remote/remote_runtime.py +++ b/openhands/runtime/impl/remote/remote_runtime.py @@ -22,7 +22,7 @@ ) from openhands.events.action.action import Action from openhands.events.observation import ( - FatalErrorObservation, + ErrorObservation, NullObservation, Observation, ) @@ -351,12 +351,14 @@ def run_action(self, action: Action) -> Observation: return NullObservation('') action_type = action.action # type: ignore[attr-defined] if action_type not in ACTION_TYPE_TO_CLASS: - return FatalErrorObservation( - f'[Runtime (ID={self.runtime_id})] Action {action_type} does not exist.' + return ErrorObservation( + f'[Runtime (ID={self.runtime_id})] Action {action_type} does not exist.', + fatal=True, ) if not hasattr(self, action_type): - return FatalErrorObservation( - f'[Runtime (ID={self.runtime_id})] Action {action_type} is not supported in the current runtime.' + return ErrorObservation( + f'[Runtime (ID={self.runtime_id})] Action {action_type} is not supported in the current runtime.', + fatal=True, ) assert action.timeout is not None @@ -379,18 +381,20 @@ def run_action(self, action: Action) -> Observation: else: error_message = response.text logger.error(f'Error from server: {error_message}') - obs = FatalErrorObservation( - f'Action execution failed: {error_message}' + obs = ErrorObservation( + f'Action execution failed: {error_message}', fatal=True ) except Timeout: logger.error('No response received within the timeout period.') - obs = FatalErrorObservation( - f'[Runtime (ID={self.runtime_id})] Action execution timed out' + obs = ErrorObservation( + f'[Runtime (ID={self.runtime_id})] Action execution timed out', + fatal=True, ) except Exception as e: logger.error(f'Error during action execution: {e}') - obs = FatalErrorObservation( - f'[Runtime (ID={self.runtime_id})] Action execution failed: {str(e)}' + obs = ErrorObservation( + f'[Runtime (ID={self.runtime_id})] Action execution failed: {str(e)}', + fatal=True, ) return obs diff --git a/openhands/runtime/utils/bash.py b/openhands/runtime/utils/bash.py index fba16787c6d..98e3ad9c09b 100644 --- a/openhands/runtime/utils/bash.py +++ b/openhands/runtime/utils/bash.py @@ -9,7 +9,7 @@ from openhands.events.event import EventSource from openhands.events.observation import ( CmdOutputObservation, - FatalErrorObservation, + ErrorObservation, ) SOFT_TIMEOUT_SECONDS = 5 @@ -275,7 +275,7 @@ def _continue_bash( output += '\r\n' + bash_prompt return output, exit_code - def run(self, action: CmdRunAction) -> CmdOutputObservation | FatalErrorObservation: + def run(self, action: CmdRunAction) -> CmdOutputObservation | ErrorObservation: try: assert ( action.timeout is not None @@ -329,6 +329,7 @@ def run(self, action: CmdRunAction) -> CmdOutputObservation | FatalErrorObservat interpreter_details=python_interpreter, ) except UnicodeDecodeError as e: - return FatalErrorObservation( - f'Runtime bash execution failed: Command output could not be decoded as utf-8. {str(e)}' + return ErrorObservation( + f'Runtime bash execution failed: Command output could not be decoded as utf-8. {str(e)}', + fatal=True, ) diff --git a/openhands/runtime/utils/edit.py b/openhands/runtime/utils/edit.py index 4ed5c0edafc..39b5cbbce65 100644 --- a/openhands/runtime/utils/edit.py +++ b/openhands/runtime/utils/edit.py @@ -13,7 +13,6 @@ ) from openhands.events.observation import ( ErrorObservation, - FatalErrorObservation, FileEditObservation, FileReadObservation, FileWriteObservation, @@ -214,8 +213,9 @@ def edit(self, action: FileEditAction) -> Observation: if isinstance(obs, ErrorObservation): return obs if not isinstance(obs, FileWriteObservation): - return FatalErrorObservation( - f'Fatal Runtime in editing: Expected FileWriteObservation, got {type(obs)}: {str(obs)}' + return ErrorObservation( + f'Fatal Runtime in editing: Expected FileWriteObservation, got {type(obs)}: {str(obs)}', + fatal=True, ) return FileEditObservation( content=get_diff('', action.content, action.path), @@ -225,8 +225,9 @@ def edit(self, action: FileEditAction) -> Observation: new_content=action.content, ) if not isinstance(obs, FileReadObservation): - return FatalErrorObservation( - f'Fatal Runtime in editing: Expected FileReadObservation, got {type(obs)}: {str(obs)}' + return ErrorObservation( + f'Fatal Runtime in editing: Expected FileReadObservation, got {type(obs)}: {str(obs)}', + fatal=True, ) original_file_content = obs.content From d46b71272d3650792ec3e975e81735b8e10977cc Mon Sep 17 00:00:00 2001 From: Robert Brennan Date: Sat, 26 Oct 2024 00:04:49 -0400 Subject: [PATCH 10/60] refactor react_to_error a bit more --- openhands/controller/agent_controller.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/openhands/controller/agent_controller.py b/openhands/controller/agent_controller.py index 59f6417c40c..087b029aa8f 100644 --- a/openhands/controller/agent_controller.py +++ b/openhands/controller/agent_controller.py @@ -134,16 +134,13 @@ async def update_state_after_step(self): async def _react_to_error( self, message: str, - exception: Exception | None = None, new_state: AgentState | None = None, ): if new_state is not None: + # it's important to set the state before adding the error event, so that metrics sync properly await self.set_agent_state_to(new_state) - detail = str(exception) if exception is not None else '' - if exception is not None and isinstance(exception, litellm.AuthenticationError): - detail += '\nPlease check your credentials. Is your API key correct?' await self.event_stream.async_add_event( - ErrorObservation(f'{message}:{detail}'), EventSource.AGENT + ErrorObservation(message), EventSource.AGENT ) async def start_step_loop(self): @@ -160,9 +157,13 @@ async def start_step_loop(self): traceback.print_exc() logger.error(f'Error while running the agent: {e}') logger.error(traceback.format_exc()) + detail = str(e) + if isinstance(e, litellm.AuthenticationError): + detail += ( + '\nPlease check your credentials. Is your API key correct?' + ) await self._react_to_error( - 'There was an unexpected error while running the agent', - exception=e, + detail, new_state=AgentState.ERROR, ) break From f9c283ea9bc084c3a50d2b688953886db288cdaf Mon Sep 17 00:00:00 2001 From: Robert Brennan Date: Sat, 26 Oct 2024 00:09:03 -0400 Subject: [PATCH 11/60] fix comments --- openhands/controller/agent_controller.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/openhands/controller/agent_controller.py b/openhands/controller/agent_controller.py index 087b029aa8f..a51620f1d2c 100644 --- a/openhands/controller/agent_controller.py +++ b/openhands/controller/agent_controller.py @@ -425,9 +425,9 @@ async def _step(self) -> None: if action is None: raise LLMNoActionError('No action was returned') except (LLMMalformedActionError, LLMNoActionError, LLMResponseError) as e: - # report to the user - # and send the underlying exception to the LLM for self-correction - await self._react_to_error(str(e)) + await self._react_to_error( + str(e), new_state=None + ) # don't change state, LLM can correct itself return if action.runnable: @@ -473,7 +473,9 @@ async def _delegate_step(self): self.delegate = None self.delegateAction = None - await self._react_to_error('Delegator agent encountered an error') + await self._react_to_error( + 'Delegator agent encountered an error', new_state=None + ) elif delegate_state in (AgentState.FINISHED, AgentState.REJECTED): logger.info( f'[Agent Controller {self.id}] Delegate agent has finished execution' From 1c44fa6807ab21c98807f53838ddd82914c8dd8f Mon Sep 17 00:00:00 2001 From: Robert Brennan Date: Sat, 26 Oct 2024 00:45:06 -0400 Subject: [PATCH 12/60] fix test --- tests/unit/test_is_stuck.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/unit/test_is_stuck.py b/tests/unit/test_is_stuck.py index 4a133075216..d42fc7392e4 100644 --- a/tests/unit/test_is_stuck.py +++ b/tests/unit/test_is_stuck.py @@ -440,9 +440,9 @@ def test_is_stuck_repeating_action_observation_pattern( read_observation_2._cause = read_action_2._id event_stream.add_event(read_observation_2, EventSource.USER) - # one more message to break the pattern - message_null_observation = NullObservation(content='') + message_action = MessageAction(content='Come on', wait_for_response=False) event_stream.add_event(message_action, EventSource.USER) + message_null_observation = NullObservation(content='') event_stream.add_event(message_null_observation, EventSource.USER) cmd_action_3 = CmdRunAction(command='ls') From 3f67ed38d0d059da0348011733af34a72b2b8b3a Mon Sep 17 00:00:00 2001 From: Robert Brennan Date: Sat, 26 Oct 2024 01:01:30 -0400 Subject: [PATCH 13/60] fix test --- tests/unit/test_agent_controller.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/unit/test_agent_controller.py b/tests/unit/test_agent_controller.py index fbaa225c907..1aee80666c6 100644 --- a/tests/unit/test_agent_controller.py +++ b/tests/unit/test_agent_controller.py @@ -87,7 +87,7 @@ async def test_on_event_change_agent_state_action(mock_agent, mock_event_stream) @pytest.mark.asyncio -async def test_report_error(mock_agent, mock_event_stream): +async def test__react_to_error(mock_agent, mock_event_stream): controller = AgentController( agent=mock_agent, event_stream=mock_event_stream, @@ -97,7 +97,7 @@ async def test_report_error(mock_agent, mock_event_stream): headless_mode=True, ) error_message = 'Test error' - await controller.report_error(error_message) + await controller._react_to_error(error_message) assert controller.state.last_error == error_message controller.event_stream.add_event.assert_called_once() await controller.close() @@ -114,12 +114,12 @@ async def test_step_with_exception(mock_agent, mock_event_stream): headless_mode=True, ) controller.state.agent_state = AgentState.RUNNING - controller.report_error = AsyncMock() + controller._react_to_error = AsyncMock() controller.agent.step.side_effect = LLMMalformedActionError('Malformed action') await controller._step() - # Verify that report_error was called with the correct error message - controller.report_error.assert_called_once_with('Malformed action') + # Verify that _react_to_error was called with the correct error message + controller._react_to_error.assert_called_once_with('Malformed action') await controller.close() From 6aff3818c60920d5ffa479b115088f3cceed25ba Mon Sep 17 00:00:00 2001 From: Engel Nyst Date: Sun, 27 Oct 2024 04:59:55 +0100 Subject: [PATCH 14/60] Update openhands/controller/state/state.py Co-authored-by: Xingyao Wang --- openhands/controller/state/state.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openhands/controller/state/state.py b/openhands/controller/state/state.py index 2ef4ac20350..0a84d821a6d 100644 --- a/openhands/controller/state/state.py +++ b/openhands/controller/state/state.py @@ -157,7 +157,7 @@ def __setstate__(self, state): def get_last_error(self) -> str: for event in self.history.get_events(reverse=True): if isinstance(event, ErrorObservation): - return event.content + return event.content if not event.fatal else 'There was a fatal error during agent execution: ' + event.content return '' def get_current_user_intent(self): From b278d45f0829125c5d1e608c8bb6f15b4a694d1b Mon Sep 17 00:00:00 2001 From: Engel Nyst Date: Tue, 29 Oct 2024 11:22:18 +0100 Subject: [PATCH 15/60] more invisible conflicts --- tests/unit/test_agent_controller.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tests/unit/test_agent_controller.py b/tests/unit/test_agent_controller.py index 6d0fe07a748..e6de80d274b 100644 --- a/tests/unit/test_agent_controller.py +++ b/tests/unit/test_agent_controller.py @@ -14,7 +14,6 @@ from openhands.events.action import ChangeAgentStateAction, CmdRunAction, MessageAction from openhands.events.observation import ( ErrorObservation, - FatalErrorObservation, ) from openhands.events.serialization import event_to_dict from openhands.llm import LLM @@ -148,7 +147,7 @@ async def test_run_controller_with_fatal_error(mock_agent, mock_event_stream): agent.llm.metrics = Metrics() agent.llm.config = config.get_llm_config() - fatal_error_obs = FatalErrorObservation('Fatal error detected') + fatal_error_obs = ErrorObservation('Fatal error detected', fatal=True) fatal_error_obs._cause = event.id runtime = MagicMock(spec=Runtime) @@ -175,8 +174,8 @@ async def on_event(event: Event): # in side run_controller (since the while loop + sleep no longer loop) assert state.agent_state == AgentState.STOPPED assert ( - state.last_error - == 'There was a fatal error during agent execution: **FatalErrorObservation**\nFatal error detected' + state.get_last_error() + == 'There was a fatal error during agent execution: **ErrorObservation**\nFatal error detected' ) assert len(list(event_stream.get_events())) == 5 From d19f998270702147ea795820a31383a082c8adbd Mon Sep 17 00:00:00 2001 From: Robert Brennan Date: Tue, 29 Oct 2024 13:10:04 -0400 Subject: [PATCH 16/60] remove fatal observations --- openhands/controller/agent_controller.py | 2 -- openhands/controller/state/state.py | 7 +------ openhands/events/observation/error.py | 1 - openhands/runtime/impl/eventstream/eventstream_runtime.py | 8 +++----- openhands/runtime/impl/remote/remote_runtime.py | 8 +------- openhands/runtime/utils/bash.py | 1 - openhands/runtime/utils/edit.py | 6 ++---- 7 files changed, 7 insertions(+), 26 deletions(-) diff --git a/openhands/controller/agent_controller.py b/openhands/controller/agent_controller.py index b6991c9ea92..078d145e78e 100644 --- a/openhands/controller/agent_controller.py +++ b/openhands/controller/agent_controller.py @@ -246,8 +246,6 @@ async def _handle_observation(self, observation: Observation): if isinstance(observation, AgentDelegateObservation): self.state.history.on_event(observation) elif isinstance(observation, ErrorObservation): - if observation.fatal: - await self.set_agent_state_to(AgentState.ERROR) if self.state.agent_state == AgentState.ERROR: self.state.metrics.merge(self.state.local_metrics) diff --git a/openhands/controller/state/state.py b/openhands/controller/state/state.py index 50ba265c6e3..db3b3638a8c 100644 --- a/openhands/controller/state/state.py +++ b/openhands/controller/state/state.py @@ -157,12 +157,7 @@ def __setstate__(self, state): def get_last_error(self) -> str: for event in self.history.get_events(reverse=True): if isinstance(event, ErrorObservation): - return ( - event.content - if not event.fatal - else 'There was a fatal error during agent execution: ' - + event.content - ) + return event.content return '' def get_current_user_intent(self): diff --git a/openhands/events/observation/error.py b/openhands/events/observation/error.py index d10fa2b8b75..c6139e86c0b 100644 --- a/openhands/events/observation/error.py +++ b/openhands/events/observation/error.py @@ -12,7 +12,6 @@ class ErrorObservation(Observation): E.g., Linter error after editing a file. """ - fatal: bool = False observation: str = ObservationType.ERROR @property diff --git a/openhands/runtime/impl/eventstream/eventstream_runtime.py b/openhands/runtime/impl/eventstream/eventstream_runtime.py index be4c415ec05..aac4c9e5770 100644 --- a/openhands/runtime/impl/eventstream/eventstream_runtime.py +++ b/openhands/runtime/impl/eventstream/eventstream_runtime.py @@ -466,12 +466,11 @@ def run_action(self, action: Action) -> Observation: action_type = action.action # type: ignore[attr-defined] if action_type not in ACTION_TYPE_TO_CLASS: return ErrorObservation( - f'Action {action_type} does not exist.', fatal=True + f'Action {action_type} does not exist.', ) if not hasattr(self, action_type): return ErrorObservation( f'Action {action_type} is not supported in the current runtime.', - fatal=True, ) if ( getattr(action, 'confirmation_state', None) @@ -503,17 +502,16 @@ def run_action(self, action: Action) -> Observation: error_message = response.text self.log('error', f'Error from server: {error_message}') obs = ErrorObservation( - f'Action execution failed: {error_message}', fatal=True + f'Action execution failed: {error_message}', ) except requests.Timeout: self.log('error', 'No response received within the timeout period.') obs = ErrorObservation( f'Action execution timed out after {action.timeout} seconds.', - fatal=True, ) except Exception as e: self.log('error', f'Error during action execution: {e}') - obs = ErrorObservation(f'Action execution failed: {str(e)}', fatal=True) + obs = ErrorObservation(f'Action execution failed: {str(e)}') self._refresh_logs() return obs diff --git a/openhands/runtime/impl/remote/remote_runtime.py b/openhands/runtime/impl/remote/remote_runtime.py index 4d6e243be88..1169f7ee7a6 100644 --- a/openhands/runtime/impl/remote/remote_runtime.py +++ b/openhands/runtime/impl/remote/remote_runtime.py @@ -363,12 +363,10 @@ def run_action(self, action: Action) -> Observation: if action_type not in ACTION_TYPE_TO_CLASS: return ErrorObservation( f'[Runtime (ID={self.runtime_id})] Action {action_type} does not exist.', - fatal=True, ) if not hasattr(self, action_type): return ErrorObservation( f'[Runtime (ID={self.runtime_id})] Action {action_type} is not supported in the current runtime.', - fatal=True, ) assert action.timeout is not None @@ -391,20 +389,16 @@ def run_action(self, action: Action) -> Observation: else: error_message = response.text self.log('error', f'Error from server: {error_message}') - obs = ErrorObservation( - f'Action execution failed: {error_message}', fatal=True - ) + obs = ErrorObservation(f'Action execution failed: {error_message}') except Timeout: self.log('error', 'No response received within the timeout period.') obs = ErrorObservation( f'[Runtime (ID={self.runtime_id})] Action execution timed out', - fatal=True, ) except Exception as e: self.log('error', f'Error during action execution: {e}') obs = ErrorObservation( f'[Runtime (ID={self.runtime_id})] Action execution failed: {str(e)}', - fatal=True, ) return obs diff --git a/openhands/runtime/utils/bash.py b/openhands/runtime/utils/bash.py index 98e3ad9c09b..a5019315a03 100644 --- a/openhands/runtime/utils/bash.py +++ b/openhands/runtime/utils/bash.py @@ -331,5 +331,4 @@ def run(self, action: CmdRunAction) -> CmdOutputObservation | ErrorObservation: except UnicodeDecodeError as e: return ErrorObservation( f'Runtime bash execution failed: Command output could not be decoded as utf-8. {str(e)}', - fatal=True, ) diff --git a/openhands/runtime/utils/edit.py b/openhands/runtime/utils/edit.py index 478da2defc4..fe08f87b2e4 100644 --- a/openhands/runtime/utils/edit.py +++ b/openhands/runtime/utils/edit.py @@ -214,8 +214,7 @@ def edit(self, action: FileEditAction) -> Observation: return obs if not isinstance(obs, FileWriteObservation): return ErrorObservation( - f'Fatal Runtime in editing: Expected FileWriteObservation, got {type(obs)}: {str(obs)}', - fatal=True, + f'Expected FileWriteObservation, got {type(obs)}: {str(obs)}', ) return FileEditObservation( content=get_diff('', action.content, action.path), @@ -226,8 +225,7 @@ def edit(self, action: FileEditAction) -> Observation: ) if not isinstance(obs, FileReadObservation): return ErrorObservation( - f'Fatal Runtime in editing: Expected FileReadObservation, got {type(obs)}: {str(obs)}', - fatal=True, + f'Expected FileReadObservation, got {type(obs)}: {str(obs)}', ) original_file_content = obs.content From 4745cb6f135f1461407c4dcfd760f7570166a2a2 Mon Sep 17 00:00:00 2001 From: Robert Brennan Date: Tue, 29 Oct 2024 13:15:26 -0400 Subject: [PATCH 17/60] more refactoring --- openhands/controller/agent_controller.py | 38 ++++++++++-------------- 1 file changed, 16 insertions(+), 22 deletions(-) diff --git a/openhands/controller/agent_controller.py b/openhands/controller/agent_controller.py index 078d145e78e..8961f5a4b4e 100644 --- a/openhands/controller/agent_controller.py +++ b/openhands/controller/agent_controller.py @@ -3,8 +3,6 @@ import traceback from typing import Type -import litellm - from openhands.controller.agent import Agent from openhands.controller.state.state import State, TrafficControlState from openhands.controller.stuck import StuckDetector @@ -140,17 +138,15 @@ async def update_state_after_step(self): # update metrics especially for cost. Use deepcopy to avoid it being modified by agent.reset() self.state.local_metrics = copy.deepcopy(self.agent.llm.metrics) - async def _react_to_error( + async def _react_to_exception( self, - message: str, + e: Exception, new_state: AgentState | None = None, ): if new_state is not None: # it's important to set the state before adding the error event, so that metrics sync properly await self.set_agent_state_to(new_state) - await self.event_stream.async_add_event( - ErrorObservation(message), EventSource.AGENT - ) + raise e async def start_step_loop(self): """The main loop for the agent's step-by-step execution.""" @@ -165,14 +161,8 @@ async def start_step_loop(self): except Exception as e: traceback.print_exc() self.log('error', f'Error while running the agent: {e}') - self.log('error', traceback.format_exc()) - detail = str(e) - if isinstance(e, litellm.AuthenticationError): - detail += ( - '\nPlease check your credentials. Is your API key correct?' - ) - await self._react_to_error( - detail, + await self._react_to_exception( + exception=e, new_state=AgentState.ERROR, ) break @@ -437,9 +427,13 @@ async def _step(self) -> None: if action is None: raise LLMNoActionError('No action was returned') except (LLMMalformedActionError, LLMNoActionError, LLMResponseError) as e: - await self._react_to_error( - str(e), new_state=None - ) # don't change state, LLM can correct itself + await self.event_stream.async_add_event( + ErrorObservation( + content=str(e), + cause=action.id, + agent_state=self.get_agent_state(), + ) + ) return if action.runnable: @@ -465,7 +459,7 @@ async def _step(self) -> None: self.log('debug', str(action), extra={'msg_type': 'ACTION'}) if self._is_stuck(): - await self._react_to_error( + await self._react_to_exception( 'Agent got stuck in a loop', new_state=AgentState.ERROR ) @@ -486,7 +480,7 @@ async def _delegate_step(self): self.delegate = None self.delegateAction = None - await self._react_to_error( + await self._react_to_exception( 'Delegator agent encountered an error', new_state=None ) elif delegate_state in (AgentState.FINISHED, AgentState.REJECTED): @@ -537,13 +531,13 @@ async def _handle_traffic_control( else: self.state.traffic_control_state = TrafficControlState.THROTTLING if self.headless_mode: - await self._react_to_error( + await self._react_to_exception( f'Agent reached maximum {limit_type} in headless mode, task stopped. ' f'Current {limit_type}: {current_value:.2f}, max {limit_type}: {max_value:.2f}', new_state=AgentState.ERROR, ) else: - await self._react_to_error( + await self._react_to_exception( f'Agent reached maximum {limit_type}, task paused. ' f'Current {limit_type}: {current_value:.2f}, max {limit_type}: {max_value:.2f}. ' f'{TRAFFIC_CONTROL_REMINDER}', From d7fe86d4f31f6d737192e795ab664030b6df116d Mon Sep 17 00:00:00 2001 From: Robert Brennan Date: Tue, 29 Oct 2024 13:30:48 -0400 Subject: [PATCH 18/60] new react_to_exception --- openhands/controller/agent_controller.py | 35 +++++++++++------------- 1 file changed, 16 insertions(+), 19 deletions(-) diff --git a/openhands/controller/agent_controller.py b/openhands/controller/agent_controller.py index 8961f5a4b4e..4cedc1159c9 100644 --- a/openhands/controller/agent_controller.py +++ b/openhands/controller/agent_controller.py @@ -141,7 +141,7 @@ async def update_state_after_step(self): async def _react_to_exception( self, e: Exception, - new_state: AgentState | None = None, + new_state: AgentState = AgentState.ERROR, ): if new_state is not None: # it's important to set the state before adding the error event, so that metrics sync properly @@ -161,10 +161,7 @@ async def start_step_loop(self): except Exception as e: traceback.print_exc() self.log('error', f'Error while running the agent: {e}') - await self._react_to_exception( - exception=e, - new_state=AgentState.ERROR, - ) + await self._react_to_exception(e) break await asyncio.sleep(0.1) @@ -430,9 +427,8 @@ async def _step(self) -> None: await self.event_stream.async_add_event( ErrorObservation( content=str(e), - cause=action.id, - agent_state=self.get_agent_state(), - ) + ), + EventSource.AGENT, ) return @@ -459,9 +455,7 @@ async def _step(self) -> None: self.log('debug', str(action), extra={'msg_type': 'ACTION'}) if self._is_stuck(): - await self._react_to_exception( - 'Agent got stuck in a loop', new_state=AgentState.ERROR - ) + await self._react_to_exception(RuntimeError('Agent got stuck in a loop')) async def _delegate_step(self): """Executes a single step of the delegate agent.""" @@ -480,8 +474,9 @@ async def _delegate_step(self): self.delegate = None self.delegateAction = None - await self._react_to_exception( - 'Delegator agent encountered an error', new_state=None + await self.event_stream.async_add_event( + ErrorObservation('Delegate agent encountered an error'), + EventSource.AGENT, ) elif delegate_state in (AgentState.FINISHED, AgentState.REJECTED): self.log('debug', 'Delegate agent has finished execution') @@ -531,16 +526,18 @@ async def _handle_traffic_control( else: self.state.traffic_control_state = TrafficControlState.THROTTLING if self.headless_mode: - await self._react_to_exception( - f'Agent reached maximum {limit_type} in headless mode, task stopped. ' - f'Current {limit_type}: {current_value:.2f}, max {limit_type}: {max_value:.2f}', - new_state=AgentState.ERROR, + e = RuntimeError( + f'Agent reached maximum {limit_type} in headless mode. ' + f'Current {limit_type}: {current_value:.2f}, max {limit_type}: {max_value:.2f}' ) + await self._react_to_exception(e) else: - await self._react_to_exception( + e = RuntimeError( f'Agent reached maximum {limit_type}, task paused. ' f'Current {limit_type}: {current_value:.2f}, max {limit_type}: {max_value:.2f}. ' - f'{TRAFFIC_CONTROL_REMINDER}', + ) + await self._react_to_exception( + e, new_state=AgentState.PAUSED, ) stop_step = True From d240775fb0999cb28bce2d70f293aa205ba23d35 Mon Sep 17 00:00:00 2001 From: Robert Brennan Date: Tue, 29 Oct 2024 14:00:33 -0400 Subject: [PATCH 19/60] change up status message plumbing --- frontend/src/services/actions.ts | 10 ++++++---- frontend/src/types/Message.tsx | 10 ++++------ openhands/controller/agent_controller.py | 12 +++++++++-- openhands/runtime/base.py | 19 ++++++++++++++++-- openhands/runtime/impl/e2b/e2b_runtime.py | 4 ++-- .../impl/eventstream/eventstream_runtime.py | 15 +++++--------- openhands/runtime/impl/modal/modal_runtime.py | 6 +++--- .../runtime/impl/remote/remote_runtime.py | 9 ++------- openhands/server/session/agent_session.py | 18 ++++++++--------- openhands/server/session/session.py | 20 +++++++++++++------ 10 files changed, 72 insertions(+), 51 deletions(-) diff --git a/frontend/src/services/actions.ts b/frontend/src/services/actions.ts index 46b6aad8513..9d215dcb146 100644 --- a/frontend/src/services/actions.ts +++ b/frontend/src/services/actions.ts @@ -119,11 +119,11 @@ export function handleActionMessage(message: ActionMessage) { } export function handleStatusMessage(message: StatusMessage) { - const msg = message.status == null ? "" : message.status.trim(); + const msg_id = message.id; store.dispatch( setCurStatusMessage({ ...message, - status: msg, + status: msg_id, }), ); } @@ -139,9 +139,11 @@ export function handleAssistantMessage(data: string | SocketMessage) { if ("action" in socketMessage) { handleActionMessage(socketMessage); - } else if ("status" in socketMessage) { + } else if ("observation" in socketMessage) { + handleObservationMessage(socketMessage); + } else if ("status_update" in socketMessage) { handleStatusMessage(socketMessage); } else { - handleObservationMessage(socketMessage); + console.error("Unknown message type", socketMessage); } } diff --git a/frontend/src/types/Message.tsx b/frontend/src/types/Message.tsx index d4d365d5904..85b1d970641 100644 --- a/frontend/src/types/Message.tsx +++ b/frontend/src/types/Message.tsx @@ -33,10 +33,8 @@ export interface ObservationMessage { } export interface StatusMessage { - // TODO not implemented yet - // Whether the status is an error, default is false - is_error: boolean; - - // A status message to display to the user - status: string; + status_update: true; + type: string; + id: string; + message: string; } diff --git a/openhands/controller/agent_controller.py b/openhands/controller/agent_controller.py index 4cedc1159c9..4e430c05ee9 100644 --- a/openhands/controller/agent_controller.py +++ b/openhands/controller/agent_controller.py @@ -1,7 +1,9 @@ import asyncio import copy import traceback -from typing import Type +from typing import Callable, Optional, Type + +import litellm from openhands.controller.agent import Agent from openhands.controller.state.state import State, TrafficControlState @@ -73,6 +75,7 @@ def __init__( initial_state: State | None = None, is_delegate: bool = False, headless_mode: bool = True, + status_callback: Optional[Callable] = None, ): """Initializes a new instance of the AgentController class. @@ -115,6 +118,7 @@ def __init__( # stuck helper self._stuck_detector = StuckDetector(self.state) + self._status_callback = status_callback async def close(self): """Closes the agent controller, canceling any ongoing tasks and unsubscribing from the event stream.""" @@ -146,7 +150,11 @@ async def _react_to_exception( if new_state is not None: # it's important to set the state before adding the error event, so that metrics sync properly await self.set_agent_state_to(new_state) - raise e + if self._status_callback is not None: + err_id = '' + if isinstance(e, litellm.AuthenticationError): + err_id = 'llm_authentication_error' + await self._status_callback('error', err_id, str(e)) async def start_step_loop(self): """The main loop for the agent's step-by-step execution.""" diff --git a/openhands/runtime/base.py b/openhands/runtime/base.py index 474ba741a47..1b59376924a 100644 --- a/openhands/runtime/base.py +++ b/openhands/runtime/base.py @@ -31,6 +31,14 @@ from openhands.runtime.utils.edit import FileEditRuntimeMixin from openhands.utils.async_utils import call_sync_from_async +STATUS_MESSAGES = { + 'STATUS$STARTING_RUNTIME': 'Starting runtime...', + 'STATUS$STARTING_CONTAINER': 'Starting container...', + 'STATUS$PREPARING_CONTAINER': 'Preparing container...', + 'STATUS$CONTAINER_STARTED': 'Container started.', + 'STATUS$WAITING_FOR_CLIENT': 'Waiting for client...', +} + def _default_env_vars(sandbox_config: SandboxConfig) -> dict[str, str]: ret = {} @@ -62,14 +70,14 @@ def __init__( sid: str = 'default', plugins: list[PluginRequirement] | None = None, env_vars: dict[str, str] | None = None, - status_message_callback: Callable | None = None, + status_callback: Callable | None = None, attach_to_existing: bool = False, ): self.sid = sid self.event_stream = event_stream self.event_stream.subscribe(EventStreamSubscriber.RUNTIME, self.on_event) self.plugins = plugins if plugins is not None and len(plugins) > 0 else [] - self.status_message_callback = status_message_callback + self.status_callback = status_callback self.attach_to_existing = attach_to_existing self.config = copy.deepcopy(config) @@ -97,6 +105,13 @@ def log(self, level: str, message: str) -> None: message = f'[runtime {self.sid}] {message}' getattr(logger, level)(message) + def send_status_message(self, message_id: str): + """Sends a status message if the callback function was provided.""" + print('SEND STATUS', self.status_callback) + if self.status_callback: + msg = STATUS_MESSAGES.get(message_id, '') + self.status_callback('status', message_id, msg) + # ==================================================================== def add_env_vars(self, env_vars: dict[str, str]) -> None: diff --git a/openhands/runtime/impl/e2b/e2b_runtime.py b/openhands/runtime/impl/e2b/e2b_runtime.py index b5233574f0b..7c9c297f424 100644 --- a/openhands/runtime/impl/e2b/e2b_runtime.py +++ b/openhands/runtime/impl/e2b/e2b_runtime.py @@ -27,14 +27,14 @@ def __init__( sid: str = 'default', plugins: list[PluginRequirement] | None = None, sandbox: E2BSandbox | None = None, - status_message_callback: Optional[Callable] = None, + status_callback: Optional[Callable] = None, ): super().__init__( config, event_stream, sid, plugins, - status_message_callback=status_message_callback, + status_callback=status_callback, ) if sandbox is None: self.sandbox = E2BSandbox() diff --git a/openhands/runtime/impl/eventstream/eventstream_runtime.py b/openhands/runtime/impl/eventstream/eventstream_runtime.py index aac4c9e5770..17814cf67ff 100644 --- a/openhands/runtime/impl/eventstream/eventstream_runtime.py +++ b/openhands/runtime/impl/eventstream/eventstream_runtime.py @@ -123,7 +123,7 @@ def init_base_runtime( sid: str = 'default', plugins: list[PluginRequirement] | None = None, env_vars: dict[str, str] | None = None, - status_message_callback: Callable | None = None, + status_callback: Callable | None = None, attach_to_existing: bool = False, ): super().__init__( @@ -132,7 +132,7 @@ def init_base_runtime( sid, plugins, env_vars, - status_message_callback, + status_callback, attach_to_existing, ) @@ -143,7 +143,7 @@ def __init__( sid: str = 'default', plugins: list[PluginRequirement] | None = None, env_vars: dict[str, str] | None = None, - status_message_callback: Callable | None = None, + status_callback: Callable | None = None, attach_to_existing: bool = False, ): self.config = config @@ -151,7 +151,7 @@ def __init__( self._container_port = 30001 # initial dummy value self.api_url = f'{self.config.sandbox.local_runtime_url}:{self._container_port}' self.session = requests.Session() - self.status_message_callback = status_message_callback + self.status_callback = status_callback self.docker_client: docker.DockerClient = self._init_docker_client() self.base_container_image = self.config.sandbox.base_container_image @@ -181,7 +181,7 @@ def __init__( sid, plugins, env_vars, - status_message_callback, + status_callback, attach_to_existing, ) @@ -664,8 +664,3 @@ def _find_available_port(self, max_attempts=5): return port # If no port is found after max_attempts, return the last tried port return port - - def send_status_message(self, message: str): - """Sends a status message if the callback function was provided.""" - if self.status_message_callback: - self.status_message_callback(message) diff --git a/openhands/runtime/impl/modal/modal_runtime.py b/openhands/runtime/impl/modal/modal_runtime.py index 3a484c43e69..0e598a437f4 100644 --- a/openhands/runtime/impl/modal/modal_runtime.py +++ b/openhands/runtime/impl/modal/modal_runtime.py @@ -75,7 +75,7 @@ def __init__( sid: str = 'default', plugins: list[PluginRequirement] | None = None, env_vars: dict[str, str] | None = None, - status_message_callback: Callable | None = None, + status_callback: Callable | None = None, attach_to_existing: bool = False, ): assert config.modal_api_token_id, 'Modal API token id is required' @@ -102,7 +102,7 @@ def __init__( self.container_port = 3000 self.session = requests.Session() - self.status_message_callback = status_message_callback + self.status_callback = status_callback self.base_container_image_id = self.config.sandbox.base_container_image self.runtime_container_image_id = self.config.sandbox.runtime_container_image self.action_semaphore = threading.Semaphore(1) # Ensure one action at a time @@ -122,7 +122,7 @@ def __init__( sid, plugins, env_vars, - status_message_callback, + status_callback, attach_to_existing, ) diff --git a/openhands/runtime/impl/remote/remote_runtime.py b/openhands/runtime/impl/remote/remote_runtime.py index 1169f7ee7a6..a628db49654 100644 --- a/openhands/runtime/impl/remote/remote_runtime.py +++ b/openhands/runtime/impl/remote/remote_runtime.py @@ -51,7 +51,7 @@ def __init__( sid: str = 'default', plugins: list[PluginRequirement] | None = None, env_vars: dict[str, str] | None = None, - status_message_callback: Optional[Callable] = None, + status_callback: Optional[Callable] = None, attach_to_existing: bool = False, ): super().__init__( @@ -60,7 +60,7 @@ def __init__( sid, plugins, env_vars, - status_message_callback, + status_callback, attach_to_existing, ) @@ -539,8 +539,3 @@ def copy_from(self, path: str) -> bytes: raise RuntimeError( f'[Runtime (ID={self.runtime_id})] Copy operation failed: {str(e)}' ) - - def send_status_message(self, message: str): - """Sends a status message if the callback function was provided.""" - if self.status_message_callback: - self.status_message_callback(message) diff --git a/openhands/server/session/agent_session.py b/openhands/server/session/agent_session.py index 41ee9688916..b7fd08ba823 100644 --- a/openhands/server/session/agent_session.py +++ b/openhands/server/session/agent_session.py @@ -32,7 +32,12 @@ class AgentSession: _closed: bool = False loop: asyncio.AbstractEventLoop | None = None - def __init__(self, sid: str, file_store: FileStore): + def __init__( + self, + sid: str, + file_store: FileStore, + status_callback: Optional[Callable] = None, + ): """Initializes a new instance of the Session class Parameters: @@ -43,6 +48,7 @@ def __init__(self, sid: str, file_store: FileStore): self.sid = sid self.event_stream = EventStream(sid, file_store) self.file_store = file_store + self._status_callback = status_callback async def start( self, @@ -53,7 +59,6 @@ async def start( max_budget_per_task: float | None = None, agent_to_llm_config: dict[str, LLMConfig] | None = None, agent_configs: dict[str, AgentConfig] | None = None, - status_message_callback: Optional[Callable] = None, ): """Starts the Agent session Parameters: @@ -80,7 +85,6 @@ async def start( max_budget_per_task, agent_to_llm_config, agent_configs, - status_message_callback, ) def _start_thread(self, *args): @@ -99,7 +103,6 @@ async def _start( max_budget_per_task: float | None = None, agent_to_llm_config: dict[str, LLMConfig] | None = None, agent_configs: dict[str, AgentConfig] | None = None, - status_message_callback: Optional[Callable] = None, ): self.loop = asyncio.get_running_loop() self._create_security_analyzer(config.security.security_analyzer) @@ -107,7 +110,6 @@ async def _start( runtime_name=runtime_name, config=config, agent=agent, - status_message_callback=status_message_callback, ) self._create_controller( agent, @@ -161,7 +163,6 @@ async def _create_runtime( runtime_name: str, config: AppConfig, agent: Agent, - status_message_callback: Optional[Callable] = None, ): """Creates a runtime instance @@ -181,7 +182,7 @@ async def _create_runtime( event_stream=self.event_stream, sid=self.sid, plugins=agent.sandbox_plugins, - status_message_callback=status_message_callback, + status_callback=self._status_callback, ) try: @@ -251,9 +252,8 @@ def _create_controller( agent_to_llm_config=agent_to_llm_config, agent_configs=agent_configs, confirmation_mode=confirmation_mode, - # AgentSession is designed to communicate with the frontend, so we don't want to - # run the agent in headless mode. headless_mode=False, + status_callback=self._status_callback, ) try: agent_state = State.restore_from_session(self.sid, self.file_store) diff --git a/openhands/server/session/session.py b/openhands/server/session/session.py index 4e6119a1856..b83f09b2493 100644 --- a/openhands/server/session/session.py +++ b/openhands/server/session/session.py @@ -42,7 +42,9 @@ def __init__( self.sid = sid self.websocket = ws self.last_active_ts = int(time.time()) - self.agent_session = AgentSession(sid, file_store) + self.agent_session = AgentSession( + sid, file_store, status_callback=self.queue_status_message + ) self.agent_session.event_stream.subscribe( EventStreamSubscriber.SERVER, self.on_event ) @@ -116,7 +118,6 @@ async def _initialize_agent(self, data: dict): max_budget_per_task=self.config.max_budget_per_task, agent_to_llm_config=self.config.get_agent_to_llm_config_map(), agent_configs=self.config.get_agent_configs(), - status_message_callback=self.queue_status_message, ) except Exception as e: logger.exception(f'Error creating controller: {e}') @@ -177,6 +178,7 @@ async def send(self, data: dict[str, object]) -> bool: try: if self.websocket is None or not self.is_alive: return False + print('SEND JSON', data) await self.websocket.send_json(data) await asyncio.sleep(0.001) # This flushes the data to the client self.last_active_ts = int(time.time()) @@ -196,9 +198,12 @@ async def send_message(self, message: str) -> bool: """Sends a message to the client.""" return await self.send({'message': message}) - async def send_status_message(self, message: str) -> bool: + async def send_status_message(self, msg_type: str, id: str, message: str) -> bool: """Sends a status message to the client.""" - return await self.send({'status': message}) + print('SEND STATUS BACK TO CLIENT') + return await self.send( + {'status_update': True, 'type': msg_type, 'id': id, 'message': message} + ) def update_connection(self, ws: WebSocket): self.websocket = ws @@ -212,7 +217,10 @@ def load_from_data(self, data: dict) -> bool: self.is_alive = data.get('is_alive', False) return True - def queue_status_message(self, message: str): + def queue_status_message(self, msg_type: str, id: str, message: str): """Queues a status message to be sent asynchronously.""" # Ensure the coroutine runs in the main event loop - asyncio.run_coroutine_threadsafe(self.send_status_message(message), self.loop) + print('QUEUE STATUS MESSAGE') + asyncio.run_coroutine_threadsafe( + self.send_status_message(msg_type, id, message), self.loop + ) From d9f834e8c2785910d7dcd829a208e77450c8580a Mon Sep 17 00:00:00 2001 From: Robert Brennan Date: Tue, 29 Oct 2024 14:47:16 -0400 Subject: [PATCH 20/60] logspam --- openhands/runtime/base.py | 1 - openhands/server/session/session.py | 3 --- 2 files changed, 4 deletions(-) diff --git a/openhands/runtime/base.py b/openhands/runtime/base.py index 1b59376924a..d3b236131d1 100644 --- a/openhands/runtime/base.py +++ b/openhands/runtime/base.py @@ -107,7 +107,6 @@ def log(self, level: str, message: str) -> None: def send_status_message(self, message_id: str): """Sends a status message if the callback function was provided.""" - print('SEND STATUS', self.status_callback) if self.status_callback: msg = STATUS_MESSAGES.get(message_id, '') self.status_callback('status', message_id, msg) diff --git a/openhands/server/session/session.py b/openhands/server/session/session.py index b83f09b2493..2784cac5f0d 100644 --- a/openhands/server/session/session.py +++ b/openhands/server/session/session.py @@ -178,7 +178,6 @@ async def send(self, data: dict[str, object]) -> bool: try: if self.websocket is None or not self.is_alive: return False - print('SEND JSON', data) await self.websocket.send_json(data) await asyncio.sleep(0.001) # This flushes the data to the client self.last_active_ts = int(time.time()) @@ -200,7 +199,6 @@ async def send_message(self, message: str) -> bool: async def send_status_message(self, msg_type: str, id: str, message: str) -> bool: """Sends a status message to the client.""" - print('SEND STATUS BACK TO CLIENT') return await self.send( {'status_update': True, 'type': msg_type, 'id': id, 'message': message} ) @@ -220,7 +218,6 @@ def load_from_data(self, data: dict) -> bool: def queue_status_message(self, msg_type: str, id: str, message: str): """Queues a status message to be sent asynchronously.""" # Ensure the coroutine runs in the main event loop - print('QUEUE STATUS MESSAGE') asyncio.run_coroutine_threadsafe( self.send_status_message(msg_type, id, message), self.loop ) From 99486f4ae7ac35ca9b326dfa188998b015c9a36a Mon Sep 17 00:00:00 2001 From: Robert Brennan Date: Tue, 29 Oct 2024 14:52:26 -0400 Subject: [PATCH 21/60] fix await --- openhands/controller/agent_controller.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openhands/controller/agent_controller.py b/openhands/controller/agent_controller.py index 4e430c05ee9..568877a6ffa 100644 --- a/openhands/controller/agent_controller.py +++ b/openhands/controller/agent_controller.py @@ -154,7 +154,7 @@ async def _react_to_exception( err_id = '' if isinstance(e, litellm.AuthenticationError): err_id = 'llm_authentication_error' - await self._status_callback('error', err_id, str(e)) + self._status_callback('error', err_id, str(e)) async def start_step_loop(self): """The main loop for the agent's step-by-step execution.""" From ed7f4ee34d073b0d3ba2abdd02695d9c77f378c7 Mon Sep 17 00:00:00 2001 From: Robert Brennan Date: Tue, 29 Oct 2024 15:25:30 -0400 Subject: [PATCH 22/60] better error toasts on frontend --- frontend/src/components/AgentStatusBar.tsx | 28 +++++++++++++++------- frontend/src/services/actions.ts | 2 -- 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/frontend/src/components/AgentStatusBar.tsx b/frontend/src/components/AgentStatusBar.tsx index c337a838f32..be6eba4448a 100644 --- a/frontend/src/components/AgentStatusBar.tsx +++ b/frontend/src/components/AgentStatusBar.tsx @@ -5,6 +5,7 @@ import { I18nKey } from "#/i18n/declaration"; import { RootState } from "#/store"; import AgentState from "#/types/AgentState"; import beep from "#/utils/beep"; +import toast from "react-hot-toast"; enum IndicatorColor { BLUE = "bg-blue-500", @@ -16,7 +17,7 @@ enum IndicatorColor { } function AgentStatusBar() { - const { t } = useTranslation(); + const { t, i18n } = useTranslation(); const { curAgentState } = useSelector((state: RootState) => state.agent); const { curStatusMessage } = useSelector((state: RootState) => state.status); @@ -94,15 +95,26 @@ function AgentStatusBar() { const [statusMessage, setStatusMessage] = React.useState(""); React.useEffect(() => { - if (curAgentState === AgentState.LOADING) { - const trimmedCustomMessage = curStatusMessage.status.trim(); - if (trimmedCustomMessage) { - setStatusMessage(t(trimmedCustomMessage)); - return; + let message = curStatusMessage.message || ''; + if (curStatusMessage?.id) { + const id = curStatusMessage.id.trim(); + console.log('status message id', id); + if (i18n.exists(id)) { + console.log('exists'); + message = t(curStatusMessage.id.trim()) || message; } } - setStatusMessage(AgentStatusMap[curAgentState].message); - }, [curAgentState, curStatusMessage.status]); + if (curStatusMessage?.type === "error") { + console.log('error', message); + toast.error(message); + return; + } + if (curAgentState === AgentState.LOADING && message.trim()) { + setStatusMessage(message); + } else { + setStatusMessage(AgentStatusMap[curAgentState].message); + } + }, [curAgentState, curStatusMessage.id]); return (
diff --git a/frontend/src/services/actions.ts b/frontend/src/services/actions.ts index 9d215dcb146..620f4513113 100644 --- a/frontend/src/services/actions.ts +++ b/frontend/src/services/actions.ts @@ -119,11 +119,9 @@ export function handleActionMessage(message: ActionMessage) { } export function handleStatusMessage(message: StatusMessage) { - const msg_id = message.id; store.dispatch( setCurStatusMessage({ ...message, - status: msg_id, }), ); } From 2e342b870e2dcbf531010bcd73e4014ef1bdbf5e Mon Sep 17 00:00:00 2001 From: Robert Brennan Date: Tue, 29 Oct 2024 15:32:51 -0400 Subject: [PATCH 23/60] better error plumbing --- frontend/src/components/AgentStatusBar.tsx | 7 ++++++- openhands/controller/agent_controller.py | 3 ++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/frontend/src/components/AgentStatusBar.tsx b/frontend/src/components/AgentStatusBar.tsx index be6eba4448a..351f4924bba 100644 --- a/frontend/src/components/AgentStatusBar.tsx +++ b/frontend/src/components/AgentStatusBar.tsx @@ -95,6 +95,7 @@ function AgentStatusBar() { const [statusMessage, setStatusMessage] = React.useState(""); React.useEffect(() => { + console.log('cur status message', curStatusMessage); let message = curStatusMessage.message || ''; if (curStatusMessage?.id) { const id = curStatusMessage.id.trim(); @@ -114,7 +115,11 @@ function AgentStatusBar() { } else { setStatusMessage(AgentStatusMap[curAgentState].message); } - }, [curAgentState, curStatusMessage.id]); + }, [curStatusMessage.id]); + + React.useEffect(() => { + setStatusMessage(AgentStatusMap[curAgentState].message); + }, [curAgentState]); return (
diff --git a/openhands/controller/agent_controller.py b/openhands/controller/agent_controller.py index 568877a6ffa..7a02166deb0 100644 --- a/openhands/controller/agent_controller.py +++ b/openhands/controller/agent_controller.py @@ -322,9 +322,10 @@ async def set_agent_state_to(self, new_state: AgentState): ) self.state.agent_state = new_state - self.event_stream.add_event( + await self.event_stream.async_add_event( AgentStateChangedObservation('', self.state.agent_state), EventSource.AGENT ) + print('sent state change obs') if new_state == AgentState.INIT and self.state.resume_state: await self.set_agent_state_to(self.state.resume_state) From 154b838b442fdcbcd839a80d0fb63205d024880f Mon Sep 17 00:00:00 2001 From: Robert Brennan Date: Tue, 29 Oct 2024 15:33:04 -0400 Subject: [PATCH 24/60] logspam --- frontend/src/components/AgentStatusBar.tsx | 4 ---- 1 file changed, 4 deletions(-) diff --git a/frontend/src/components/AgentStatusBar.tsx b/frontend/src/components/AgentStatusBar.tsx index 351f4924bba..8aeb567f096 100644 --- a/frontend/src/components/AgentStatusBar.tsx +++ b/frontend/src/components/AgentStatusBar.tsx @@ -95,18 +95,14 @@ function AgentStatusBar() { const [statusMessage, setStatusMessage] = React.useState(""); React.useEffect(() => { - console.log('cur status message', curStatusMessage); let message = curStatusMessage.message || ''; if (curStatusMessage?.id) { const id = curStatusMessage.id.trim(); - console.log('status message id', id); if (i18n.exists(id)) { - console.log('exists'); message = t(curStatusMessage.id.trim()) || message; } } if (curStatusMessage?.type === "error") { - console.log('error', message); toast.error(message); return; } From 45ba7cf1139ded62425acea7ccafed3e8888f237 Mon Sep 17 00:00:00 2001 From: Robert Brennan Date: Tue, 29 Oct 2024 16:36:19 -0400 Subject: [PATCH 25/60] better restarting --- openhands/agenthub/codeact_agent/codeact_agent.py | 2 +- openhands/controller/agent_controller.py | 2 +- openhands/server/session/agent_session.py | 1 + openhands/server/session/session.py | 12 ++++++------ 4 files changed, 9 insertions(+), 8 deletions(-) diff --git a/openhands/agenthub/codeact_agent/codeact_agent.py b/openhands/agenthub/codeact_agent/codeact_agent.py index 997d424c515..9dca9baf235 100644 --- a/openhands/agenthub/codeact_agent/codeact_agent.py +++ b/openhands/agenthub/codeact_agent/codeact_agent.py @@ -110,7 +110,7 @@ def __init__( codeact_enable_jupyter=self.config.codeact_enable_jupyter, codeact_enable_llm_editor=self.config.codeact_enable_llm_editor, ) - logger.info( + logger.debug( f'TOOLS loaded for CodeActAgent: {json.dumps(self.tools, indent=2)}' ) self.system_prompt = codeact_function_calling.SYSTEM_PROMPT diff --git a/openhands/controller/agent_controller.py b/openhands/controller/agent_controller.py index 7a02166deb0..990cb6deb88 100644 --- a/openhands/controller/agent_controller.py +++ b/openhands/controller/agent_controller.py @@ -170,7 +170,6 @@ async def start_step_loop(self): traceback.print_exc() self.log('error', f'Error while running the agent: {e}') await self._react_to_exception(e) - break await asyncio.sleep(0.1) @@ -256,6 +255,7 @@ async def _handle_message_action(self, action: MessageAction): str(action), extra={'msg_type': 'ACTION', 'event_source': EventSource.USER}, ) + print('GOT MESSAGE ACTION, RESTARTING') if self.get_agent_state() != AgentState.RUNNING: await self.set_agent_state_to(AgentState.RUNNING) elif action.source == EventSource.AGENT and action.wait_for_response: diff --git a/openhands/server/session/agent_session.py b/openhands/server/session/agent_session.py index b7fd08ba823..6194f6d3a6c 100644 --- a/openhands/server/session/agent_session.py +++ b/openhands/server/session/agent_session.py @@ -128,6 +128,7 @@ async def _start( async def close(self): """Closes the Agent session""" + print('CLOSING AGENT SESSION') if self._closed: return diff --git a/openhands/server/session/session.py b/openhands/server/session/session.py index 2784cac5f0d..678982651af 100644 --- a/openhands/server/session/session.py +++ b/openhands/server/session/session.py @@ -52,6 +52,7 @@ def __init__( self.loop = asyncio.get_event_loop() async def close(self): + print('CLOSE SESSION') self.is_alive = False await self.agent_session.close() @@ -67,9 +68,11 @@ async def loop_recv(self): continue await self.dispatch(data) except WebSocketDisconnect: + print('WebSocketDisconnect, closing!') await self.close() logger.debug('WebSocket disconnected, sid: %s', self.sid) except RuntimeError as e: + print('RuntimeError, closing!', e) await self.close() logger.exception('Error in loop_recv: %s', e) @@ -167,12 +170,9 @@ async def dispatch(self, data: dict): ) return if self.agent_session.loop: - asyncio.run_coroutine_threadsafe( - self._add_event(event, EventSource.USER), self.agent_session.loop - ) # type: ignore - - async def _add_event(self, event, event_source): - self.agent_session.event_stream.add_event(event, EventSource.USER) + await self.agent_session.event_stream.async_add_event( + event, EventSource.USER + ) async def send(self, data: dict[str, object]) -> bool: try: From 9c28805718d0c3cfded63764572abece9d5ad154 Mon Sep 17 00:00:00 2001 From: Engel Nyst Date: Wed, 30 Oct 2024 04:55:20 +0100 Subject: [PATCH 26/60] Update openhands/controller/agent_controller.py --- openhands/controller/agent_controller.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openhands/controller/agent_controller.py b/openhands/controller/agent_controller.py index 4e430c05ee9..9b268536e71 100644 --- a/openhands/controller/agent_controller.py +++ b/openhands/controller/agent_controller.py @@ -1,7 +1,7 @@ import asyncio import copy import traceback -from typing import Callable, Optional, Type +from typing import Callable, Type import litellm From b757831adbf51143e7a2e66589dc963a970c7365 Mon Sep 17 00:00:00 2001 From: Engel Nyst Date: Wed, 30 Oct 2024 04:55:52 +0100 Subject: [PATCH 27/60] Update openhands/controller/agent_controller.py --- openhands/controller/agent_controller.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openhands/controller/agent_controller.py b/openhands/controller/agent_controller.py index 9b268536e71..a1e3d98f7b6 100644 --- a/openhands/controller/agent_controller.py +++ b/openhands/controller/agent_controller.py @@ -75,7 +75,7 @@ def __init__( initial_state: State | None = None, is_delegate: bool = False, headless_mode: bool = True, - status_callback: Optional[Callable] = None, + status_callback: Callable | None = None, ): """Initializes a new instance of the AgentController class. From 3e446bb47d1e46d13e92a8cc158c9e1c8af0ea8b Mon Sep 17 00:00:00 2001 From: Robert Brennan Date: Wed, 30 Oct 2024 16:50:30 -0400 Subject: [PATCH 28/60] delint --- frontend/src/components/AgentStatusBar.tsx | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/frontend/src/components/AgentStatusBar.tsx b/frontend/src/components/AgentStatusBar.tsx index 8aeb567f096..7de9ae0397e 100644 --- a/frontend/src/components/AgentStatusBar.tsx +++ b/frontend/src/components/AgentStatusBar.tsx @@ -1,11 +1,11 @@ import React, { useEffect } from "react"; import { useTranslation } from "react-i18next"; import { useSelector } from "react-redux"; +import toast from "react-hot-toast"; import { I18nKey } from "#/i18n/declaration"; import { RootState } from "#/store"; import AgentState from "#/types/AgentState"; import beep from "#/utils/beep"; -import toast from "react-hot-toast"; enum IndicatorColor { BLUE = "bg-blue-500", @@ -95,7 +95,7 @@ function AgentStatusBar() { const [statusMessage, setStatusMessage] = React.useState(""); React.useEffect(() => { - let message = curStatusMessage.message || ''; + let message = curStatusMessage.message || ""; if (curStatusMessage?.id) { const id = curStatusMessage.id.trim(); if (i18n.exists(id)) { @@ -114,7 +114,7 @@ function AgentStatusBar() { }, [curStatusMessage.id]); React.useEffect(() => { - setStatusMessage(AgentStatusMap[curAgentState].message); + setStatusMessage(AgentStatusMap[curAgentState].message); }, [curAgentState]); return ( From 9289cdbd61547b5c3f19e5a409ac5ab3f69a5b02 Mon Sep 17 00:00:00 2001 From: Robert Brennan Date: Wed, 30 Oct 2024 16:56:42 -0400 Subject: [PATCH 29/60] fix threading --- openhands/server/session/agent_session.py | 1 - openhands/server/session/session.py | 7 ++++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/openhands/server/session/agent_session.py b/openhands/server/session/agent_session.py index 6194f6d3a6c..b7fd08ba823 100644 --- a/openhands/server/session/agent_session.py +++ b/openhands/server/session/agent_session.py @@ -128,7 +128,6 @@ async def _start( async def close(self): """Closes the Agent session""" - print('CLOSING AGENT SESSION') if self._closed: return diff --git a/openhands/server/session/session.py b/openhands/server/session/session.py index 678982651af..a67918805db 100644 --- a/openhands/server/session/session.py +++ b/openhands/server/session/session.py @@ -170,9 +170,10 @@ async def dispatch(self, data: dict): ) return if self.agent_session.loop: - await self.agent_session.event_stream.async_add_event( - event, EventSource.USER - ) + asyncio.run_coroutine_threadsafe( + self.agent_session.event_stream.add_event(event, EventSource.USER), + self.agent_session.loop, + ) # type: ignore async def send(self, data: dict[str, object]) -> bool: try: From 52bbdd8b307e64ad383a7eed018440718f2f45a5 Mon Sep 17 00:00:00 2001 From: Robert Brennan Date: Wed, 30 Oct 2024 17:01:11 -0400 Subject: [PATCH 30/60] fix async --- openhands/server/session/session.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/openhands/server/session/session.py b/openhands/server/session/session.py index a67918805db..a2ad87d214f 100644 --- a/openhands/server/session/session.py +++ b/openhands/server/session/session.py @@ -171,7 +171,9 @@ async def dispatch(self, data: dict): return if self.agent_session.loop: asyncio.run_coroutine_threadsafe( - self.agent_session.event_stream.add_event(event, EventSource.USER), + self.agent_session.event_stream.async_add_event( + event, EventSource.USER + ), self.agent_session.loop, ) # type: ignore From e78af120808b7f512a492e8605a0098940338d55 Mon Sep 17 00:00:00 2001 From: Robert Brennan Date: Wed, 30 Oct 2024 17:05:59 -0400 Subject: [PATCH 31/60] add id for auth errors --- frontend/src/i18n/translation.json | 6 ++++++ openhands/controller/agent_controller.py | 2 +- openhands/server/session/session.py | 3 --- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/frontend/src/i18n/translation.json b/frontend/src/i18n/translation.json index 795c60e051f..3893d9f4f68 100644 --- a/frontend/src/i18n/translation.json +++ b/frontend/src/i18n/translation.json @@ -1510,5 +1510,11 @@ "ar": "في انتظار جاهزية العميل...", "fr": "En attente que le client soit prêt...", "tr": "İstemcinin hazır olması bekleniyor..." + }, + "STATUS$ERROR_LLM_AUTHENTICATION": { + "en": "Error authenticating with the LLM provider. Please check your API key" + }, + "STATUS$ERROR_RUNTIME_DISCONNECTED": { + "en": "There was an error while connecting to the runtime. Please refresh the page." } } diff --git a/openhands/controller/agent_controller.py b/openhands/controller/agent_controller.py index 8ffc278a753..4d572f8c390 100644 --- a/openhands/controller/agent_controller.py +++ b/openhands/controller/agent_controller.py @@ -153,7 +153,7 @@ async def _react_to_exception( if self._status_callback is not None: err_id = '' if isinstance(e, litellm.AuthenticationError): - err_id = 'llm_authentication_error' + err_id = 'STATUS$ERROR_LLM_AUTHENTICATION' self._status_callback('error', err_id, str(e)) async def start_step_loop(self): diff --git a/openhands/server/session/session.py b/openhands/server/session/session.py index a2ad87d214f..f5bdc20e0c6 100644 --- a/openhands/server/session/session.py +++ b/openhands/server/session/session.py @@ -52,7 +52,6 @@ def __init__( self.loop = asyncio.get_event_loop() async def close(self): - print('CLOSE SESSION') self.is_alive = False await self.agent_session.close() @@ -68,11 +67,9 @@ async def loop_recv(self): continue await self.dispatch(data) except WebSocketDisconnect: - print('WebSocketDisconnect, closing!') await self.close() logger.debug('WebSocket disconnected, sid: %s', self.sid) except RuntimeError as e: - print('RuntimeError, closing!', e) await self.close() logger.exception('Error in loop_recv: %s', e) From 7b7b306bf7e6ec71faa28e4f4ae474ee588cc32e Mon Sep 17 00:00:00 2001 From: Robert Brennan Date: Wed, 30 Oct 2024 17:35:30 -0400 Subject: [PATCH 32/60] better error messages\, use messages instead of toast --- frontend/src/components/chat-interface.tsx | 2 +- frontend/src/components/error-message.tsx | 25 +++++++++++++++++++--- frontend/src/i18n/translation.json | 6 ++++++ frontend/src/services/actions.ts | 20 +++++++++++------ frontend/src/state/chatSlice.ts | 6 +++--- openhands/controller/agent_controller.py | 1 - openhands/runtime/base.py | 2 +- 7 files changed, 47 insertions(+), 15 deletions(-) diff --git a/frontend/src/components/chat-interface.tsx b/frontend/src/components/chat-interface.tsx index 10786c13923..0eaaef0bb6c 100644 --- a/frontend/src/components/chat-interface.tsx +++ b/frontend/src/components/chat-interface.tsx @@ -106,7 +106,7 @@ export function ChatInterface() { isErrorMessage(message) ? ( ) : ( diff --git a/frontend/src/components/error-message.tsx b/frontend/src/components/error-message.tsx index cded8c3729f..7bc14c73b41 100644 --- a/frontend/src/components/error-message.tsx +++ b/frontend/src/components/error-message.tsx @@ -1,14 +1,33 @@ +import { useState, useEffect } from "react"; +import { useTranslation } from "react-i18next"; + interface ErrorMessageProps { error: string; message: string; } -export function ErrorMessage({ error, message }: ErrorMessageProps) { +export function ErrorMessage({ id, message }: ErrorMessageProps) { + const { t, i18n } = useTranslation(); + const [showDetails, setShowDetails] = useState(true); + const [headline, setHeadline] = useState(''); + const [details, setDetails] = useState(message); + + useEffect(() => { + if (i18n.exists(id)) { + setHeadline(t(id)); + setDetails(message); + setShowDetails(false); + } + }, [id, message, i18n.language]); + return ( ); diff --git a/frontend/src/i18n/translation.json b/frontend/src/i18n/translation.json index 3893d9f4f68..ab73c4f86e2 100644 --- a/frontend/src/i18n/translation.json +++ b/frontend/src/i18n/translation.json @@ -1441,6 +1441,12 @@ "fr": "Privé", "tr": "Özel" }, + "ERROR_MESSAGE$SHOW_DETAILS": { + "en": "Show details" + }, + "ERROR_MESSAGE$HIDE_DETAILS": { + "en": "Hide details" + }, "STATUS$STARTING_RUNTIME": { "en": "Starting Runtime...", "zh-CN": "启动运行时...", diff --git a/frontend/src/services/actions.ts b/frontend/src/services/actions.ts index 620f4513113..36e6e473021 100644 --- a/frontend/src/services/actions.ts +++ b/frontend/src/services/actions.ts @@ -1,4 +1,4 @@ -import { addAssistantMessage, addUserMessage } from "#/state/chatSlice"; +import { addAssistantMessage, addUserMessage, addErrorMessage } from "#/state/chatSlice"; import { setCode, setActiveFilepath } from "#/state/codeSlice"; import { appendJupyterInput } from "#/state/jupyterSlice"; import { @@ -119,11 +119,19 @@ export function handleActionMessage(message: ActionMessage) { } export function handleStatusMessage(message: StatusMessage) { - store.dispatch( - setCurStatusMessage({ - ...message, - }), - ); + if (message.type === 'info') { + store.dispatch( + setCurStatusMessage({ + ...message, + }), + ); + } else if (message.type === 'error') { + store.dispatch( + addErrorMessage({ + ...message, + }), + ); + } } export function handleAssistantMessage(data: string | SocketMessage) { diff --git a/frontend/src/state/chatSlice.ts b/frontend/src/state/chatSlice.ts index 46f156ebddb..375e3ba260f 100644 --- a/frontend/src/state/chatSlice.ts +++ b/frontend/src/state/chatSlice.ts @@ -39,10 +39,10 @@ export const chatSlice = createSlice({ addErrorMessage( state, - action: PayloadAction<{ error: string; message: string }>, + action: PayloadAction<{ id: string, message: string }>, ) { - const { error, message } = action.payload; - state.messages.push({ error, message }); + const { id, message } = action.payload; + state.messages.push({ id, message, error: true }); }, clearMessages(state) { diff --git a/openhands/controller/agent_controller.py b/openhands/controller/agent_controller.py index 4d572f8c390..8d6a55e0b52 100644 --- a/openhands/controller/agent_controller.py +++ b/openhands/controller/agent_controller.py @@ -325,7 +325,6 @@ async def set_agent_state_to(self, new_state: AgentState): await self.event_stream.async_add_event( AgentStateChangedObservation('', self.state.agent_state), EventSource.AGENT ) - print('sent state change obs') if new_state == AgentState.INIT and self.state.resume_state: await self.set_agent_state_to(self.state.resume_state) diff --git a/openhands/runtime/base.py b/openhands/runtime/base.py index d3b236131d1..d5076aadbc5 100644 --- a/openhands/runtime/base.py +++ b/openhands/runtime/base.py @@ -109,7 +109,7 @@ def send_status_message(self, message_id: str): """Sends a status message if the callback function was provided.""" if self.status_callback: msg = STATUS_MESSAGES.get(message_id, '') - self.status_callback('status', message_id, msg) + self.status_callback('info', message_id, msg) # ==================================================================== From baf2c11ea5d873b08aa485aac51e6880d286b398 Mon Sep 17 00:00:00 2001 From: Robert Brennan Date: Wed, 30 Oct 2024 17:47:29 -0400 Subject: [PATCH 33/60] no more retrys --- openhands/runtime/builder/remote.py | 11 ++--- .../impl/eventstream/eventstream_runtime.py | 18 +++---- .../runtime/impl/remote/remote_runtime.py | 40 ++++++---------- openhands/runtime/utils/request.py | 47 +++---------------- 4 files changed, 35 insertions(+), 81 deletions(-) diff --git a/openhands/runtime/builder/remote.py b/openhands/runtime/builder/remote.py index f96afb38eeb..f7000a8244e 100644 --- a/openhands/runtime/builder/remote.py +++ b/openhands/runtime/builder/remote.py @@ -7,7 +7,7 @@ from openhands.core.logger import openhands_logger as logger from openhands.runtime.builder import RuntimeBuilder -from openhands.runtime.utils.request import is_429_error, send_request_with_retry +from openhands.runtime.utils.request import send_request from openhands.runtime.utils.shutdown_listener import ( should_continue, sleep_if_should_continue, @@ -45,13 +45,12 @@ def build(self, path: str, tags: list[str], platform: str | None = None) -> str: files.append(('tags', (None, tag))) # Send the POST request to /build (Begins the build process) - response = send_request_with_retry( + response = send_request( self.session, 'POST', f'{self.api_url}/build', files=files, timeout=30, - retry_fns=[is_429_error], ) if response.status_code != 202: @@ -70,12 +69,11 @@ def build(self, path: str, tags: list[str], platform: str | None = None) -> str: logger.error('Build timed out after 30 minutes') raise RuntimeError('Build timed out after 30 minutes') - status_response = send_request_with_retry( + status_response = send_request( self.session, 'GET', f'{self.api_url}/build_status', params={'build_id': build_id}, - timeout=30, ) if status_response.status_code != 200: @@ -112,12 +110,11 @@ def build(self, path: str, tags: list[str], platform: str | None = None) -> str: def image_exists(self, image_name: str, pull_from_repo: bool = True) -> bool: """Checks if an image exists in the remote registry using the /image_exists endpoint.""" params = {'image': image_name} - response = send_request_with_retry( + response = send_request( self.session, 'GET', f'{self.api_url}/image_exists', params=params, - timeout=30, ) if response.status_code != 200: diff --git a/openhands/runtime/impl/eventstream/eventstream_runtime.py b/openhands/runtime/impl/eventstream/eventstream_runtime.py index 7ac78b183b9..3362e5162ea 100644 --- a/openhands/runtime/impl/eventstream/eventstream_runtime.py +++ b/openhands/runtime/impl/eventstream/eventstream_runtime.py @@ -36,7 +36,7 @@ from openhands.runtime.builder import DockerRuntimeBuilder from openhands.runtime.plugins import PluginRequirement from openhands.runtime.utils import find_available_tcp_port -from openhands.runtime.utils.request import send_request_with_retry +from openhands.runtime.utils.request import send_request from openhands.runtime.utils.runtime_build import build_runtime_image from openhands.utils.tenacity_stop import stop_if_should_exit @@ -304,7 +304,7 @@ def _init_container(self): volumes = None self.log( 'debug', - f'Sandbox workspace: {self.config.workspace_mount_path_in_sandbox}' + f'Sandbox workspace: {self.config.workspace_mount_path_in_sandbox}', ) if self.config.sandbox.browsergym_eval_env is not None: @@ -392,12 +392,12 @@ def _wait_until_alive(self): if not self.log_buffer: raise RuntimeError('Runtime client is not ready.') - response = send_request_with_retry( + response = send_request( self.session, 'GET', f'{self.api_url}/alive', retry_exceptions=[ConnectionRefusedError], - timeout=300, # 5 minutes gives the container time to be alive 🧟‍♂️ + timeout=5, ) if response.status_code == 200: return @@ -486,7 +486,7 @@ def run_action(self, action: Action) -> Observation: assert action.timeout is not None try: - response = send_request_with_retry( + response = send_request( self.session, 'POST', f'{self.api_url}/execute_action', @@ -569,7 +569,7 @@ def copy_to( params = {'destination': sandbox_dest, 'recursive': str(recursive).lower()} - response = send_request_with_retry( + response = send_request( self.session, 'POST', f'{self.api_url}/upload_file', @@ -606,12 +606,12 @@ def list_files(self, path: str | None = None) -> list[str]: if path is not None: data['path'] = path - response = send_request_with_retry( + response = send_request( self.session, 'POST', f'{self.api_url}/list_files', json=data, - timeout=30, # 30 seconds because the container should already be alive + timeout=10, ) if response.status_code == 200: response_json = response.json() @@ -630,7 +630,7 @@ def copy_from(self, path: str) -> bytes: self._refresh_logs() try: params = {'path': path} - response = send_request_with_retry( + response = send_request( self.session, 'GET', f'{self.api_url}/download_files', diff --git a/openhands/runtime/impl/remote/remote_runtime.py b/openhands/runtime/impl/remote/remote_runtime.py index a628db49654..c9f9d230aa5 100644 --- a/openhands/runtime/impl/remote/remote_runtime.py +++ b/openhands/runtime/impl/remote/remote_runtime.py @@ -32,9 +32,7 @@ from openhands.runtime.plugins import PluginRequirement from openhands.runtime.utils.command import get_remote_startup_command from openhands.runtime.utils.request import ( - is_404_error, - is_503_error, - send_request_with_retry, + send_request, ) from openhands.runtime.utils.runtime_build import build_runtime_image @@ -127,7 +125,7 @@ def _start_or_attach_to_runtime(self): def _check_existing_runtime(self) -> bool: try: - response = send_request_with_retry( + response = send_request( self.session, 'GET', f'{self.config.sandbox.remote_runtime_api_url}/runtime/{self.sid}', @@ -160,11 +158,11 @@ def _check_existing_runtime(self) -> bool: def _build_runtime(self): self.log('debug', f'Building RemoteRuntime config:\n{self.config}') - response = send_request_with_retry( + response = send_request( self.session, 'GET', f'{self.config.sandbox.remote_runtime_api_url}/registry_prefix', - timeout=30, + timeout=10, ) response_json = response.json() registry_prefix = response_json['registry_prefix'] @@ -191,12 +189,12 @@ def _build_runtime(self): force_rebuild=self.config.sandbox.force_rebuild_runtime, ) - response = send_request_with_retry( + response = send_request( self.session, 'GET', f'{self.config.sandbox.remote_runtime_api_url}/image_exists', params={'image': self.container_image}, - timeout=30, + timeout=10, ) if response.status_code != 200 or not response.json()['exists']: raise RuntimeError(f'Container image {self.container_image} does not exist') @@ -228,12 +226,11 @@ def _start_runtime(self): } # Start the sandbox using the /start endpoint - response = send_request_with_retry( + response = send_request( self.session, 'POST', f'{self.config.sandbox.remote_runtime_api_url}/start', json=start_request, - timeout=300, ) if response.status_code != 201: raise RuntimeError( @@ -246,7 +243,7 @@ def _start_runtime(self): ) def _resume_runtime(self): - response = send_request_with_retry( + response = send_request( self.session, 'POST', f'{self.config.sandbox.remote_runtime_api_url}/resume', @@ -275,11 +272,10 @@ def _wait_until_alive(self): max_not_found_count = 12 # 2 minutes not_found_count = 0 while not pod_running: - runtime_info_response = send_request_with_retry( + runtime_info_response = send_request( self.session, 'GET', f'{self.config.sandbox.remote_runtime_api_url}/runtime/{self.runtime_id}', - timeout=5, ) if runtime_info_response.status_code != 200: raise RuntimeError( @@ -310,16 +306,10 @@ def _wait_until_alive(self): # Pending otherwise - add proper sleep time.sleep(10) - response = send_request_with_retry( + response = send_request( self.session, 'GET', f'{self.runtime_url}/alive', - # Retry 404 & 503 errors for the /alive endpoint - # because the runtime might just be starting up - # and have not registered the endpoint yet - retry_fns=[is_404_error, is_503_error], - # leave enough time for the runtime to start up - timeout=600, ) if response.status_code != 200: msg = f'Runtime (ID={self.runtime_id}) is not alive yet. Status: {response.status_code}.' @@ -332,7 +322,7 @@ def close(self, timeout: int = 10): return if self.runtime_id: try: - response = send_request_with_retry( + response = send_request( self.session, 'POST', f'{self.config.sandbox.remote_runtime_api_url}/stop', @@ -374,7 +364,7 @@ def run_action(self, action: Action) -> Observation: try: request_body = {'action': event_to_dict(action)} self.log('debug', f'Request body: {request_body}') - response = send_request_with_retry( + response = send_request( self.session, 'POST', f'{self.runtime_url}/execute_action', @@ -448,7 +438,7 @@ def copy_to( params = {'destination': sandbox_dest, 'recursive': str(recursive).lower()} - response = send_request_with_retry( + response = send_request( self.session, 'POST', f'{self.runtime_url}/upload_file', @@ -488,7 +478,7 @@ def list_files(self, path: str | None = None) -> list[str]: if path is not None: data['path'] = path - response = send_request_with_retry( + response = send_request( self.session, 'POST', f'{self.runtime_url}/list_files', @@ -517,7 +507,7 @@ def copy_from(self, path: str) -> bytes: """Zip all files in the sandbox and return as a stream of bytes.""" try: params = {'path': path} - response = send_request_with_retry( + response = send_request( self.session, 'GET', f'{self.runtime_url}/download_files', diff --git a/openhands/runtime/utils/request.py b/openhands/runtime/utils/request.py index 6940827a3ad..3b654c7aca4 100644 --- a/openhands/runtime/utils/request.py +++ b/openhands/runtime/utils/request.py @@ -1,22 +1,12 @@ -from typing import Any, Callable, Type +from typing import Any import requests from requests.exceptions import ( ChunkedEncodingError, ConnectionError, ) -from tenacity import ( - retry, - retry_if_exception, - retry_if_exception_type, - stop_after_delay, - wait_exponential, -) from urllib3.exceptions import IncompleteRead -from openhands.core.logger import openhands_logger as logger -from openhands.utils.tenacity_stop import stop_if_should_exit - def is_server_error(exception): return ( @@ -60,37 +50,14 @@ def is_502_error(exception): ] -def send_request_with_retry( +def send_request( session: requests.Session, method: str, url: str, - timeout: int, - retry_exceptions: list[Type[Exception]] | None = None, - retry_fns: list[Callable[[Exception], bool]] | None = None, + timeout: int = 10, **kwargs: Any, ) -> requests.Response: - exceptions_to_catch = retry_exceptions or DEFAULT_RETRY_EXCEPTIONS - retry_condition = retry_if_exception_type( - tuple(exceptions_to_catch) - ) | retry_if_exception(is_502_error) - if retry_fns is not None: - for fn in retry_fns: - retry_condition |= retry_if_exception(fn) - # wait a few more seconds to get the timeout error from client side - kwargs['timeout'] = timeout + 10 - - @retry( - stop=stop_after_delay(timeout) | stop_if_should_exit(), - wait=wait_exponential(multiplier=1, min=4, max=20), - retry=retry_condition, - reraise=True, - before_sleep=lambda retry_state: logger.debug( - f'Retrying {method} request to {url} due to {retry_state.outcome.exception()}. Attempt {retry_state.attempt_number}' - ), - ) - def _send_request_with_retry(): - response = session.request(method, url, **kwargs) - response.raise_for_status() - return response - - return _send_request_with_retry() + kwargs['timeout'] = timeout + response = session.request(method, url, **kwargs) + response.raise_for_status() + return response From 4be71e1c31491cba75357d8cd9ca3dc67d6b85b5 Mon Sep 17 00:00:00 2001 From: Robert Brennan Date: Wed, 30 Oct 2024 17:49:49 -0400 Subject: [PATCH 34/60] fix no headline --- frontend/src/components/error-message.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/frontend/src/components/error-message.tsx b/frontend/src/components/error-message.tsx index 7bc14c73b41..439067ffc9a 100644 --- a/frontend/src/components/error-message.tsx +++ b/frontend/src/components/error-message.tsx @@ -25,7 +25,7 @@ export function ErrorMessage({ id, message }: ErrorMessageProps) { From 3e07a2dd187a13090d4e4d529bce852969c07115 Mon Sep 17 00:00:00 2001 From: Robert Brennan Date: Wed, 30 Oct 2024 17:54:39 -0400 Subject: [PATCH 35/60] fix call --- openhands/runtime/impl/eventstream/eventstream_runtime.py | 1 - 1 file changed, 1 deletion(-) diff --git a/openhands/runtime/impl/eventstream/eventstream_runtime.py b/openhands/runtime/impl/eventstream/eventstream_runtime.py index 3362e5162ea..318e7d07355 100644 --- a/openhands/runtime/impl/eventstream/eventstream_runtime.py +++ b/openhands/runtime/impl/eventstream/eventstream_runtime.py @@ -396,7 +396,6 @@ def _wait_until_alive(self): self.session, 'GET', f'{self.api_url}/alive', - retry_exceptions=[ConnectionRefusedError], timeout=5, ) if response.status_code == 200: From c6fd5ac218cf64da4a88a2ac1f3de8b58c02f64c Mon Sep 17 00:00:00 2001 From: Robert Brennan Date: Wed, 30 Oct 2024 19:21:20 -0400 Subject: [PATCH 36/60] runtime errors plumbing through properly --- openhands/runtime/base.py | 25 ++++++++++++++++--- .../impl/eventstream/eventstream_runtime.py | 13 ++++------ openhands/server/listen.py | 10 +++++--- openhands/server/session/agent_session.py | 8 ++++++ openhands/server/session/session.py | 9 ++++--- 5 files changed, 48 insertions(+), 17 deletions(-) diff --git a/openhands/runtime/base.py b/openhands/runtime/base.py index d5076aadbc5..c1471dfc438 100644 --- a/openhands/runtime/base.py +++ b/openhands/runtime/base.py @@ -5,6 +5,8 @@ from abc import abstractmethod from typing import Callable +from requests.exceptions import ConnectionError + from openhands.core.config import AppConfig, SandboxConfig from openhands.core.logger import openhands_logger as logger from openhands.events import EventSource, EventStream, EventStreamSubscriber @@ -111,6 +113,11 @@ def send_status_message(self, message_id: str): msg = STATUS_MESSAGES.get(message_id, '') self.status_callback('info', message_id, msg) + def send_error_message(self, message_id: str, message: str): + if self.status_callback: + print('RUNTIME STATUS CALLBACK', message_id, message) + self.status_callback('error', message_id, message) + # ==================================================================== def add_env_vars(self, env_vars: dict[str, str]) -> None: @@ -145,9 +152,21 @@ async def on_event(self, event: Event) -> None: if event.timeout is None: event.timeout = self.config.sandbox.timeout assert event.timeout is not None - observation: Observation = await call_sync_from_async( - self.run_action, event - ) + try: + observation: Observation = await call_sync_from_async( + self.run_action, event + ) + except Exception as e: + err_id = '' + print('e class', e.__class__) + if isinstance(e, ConnectionError): + err_id = 'STATUS$ERROR_RUNTIME_DISCONNECTED' + self.log('error', f'Unexpected error while running action {e}') + self.log('error', f'Problematic action: {str(event)}') + self.send_error_message(err_id, str(e)) + self.close() + return + observation._cause = event.id # type: ignore[attr-defined] observation.tool_call_metadata = event.tool_call_metadata source = event.source if event.source else EventSource.AGENT diff --git a/openhands/runtime/impl/eventstream/eventstream_runtime.py b/openhands/runtime/impl/eventstream/eventstream_runtime.py index 318e7d07355..df7b9110676 100644 --- a/openhands/runtime/impl/eventstream/eventstream_runtime.py +++ b/openhands/runtime/impl/eventstream/eventstream_runtime.py @@ -38,6 +38,7 @@ from openhands.runtime.utils import find_available_tcp_port from openhands.runtime.utils.request import send_request from openhands.runtime.utils.runtime_build import build_runtime_image +from openhands.utils.async_utils import call_sync_from_async from openhands.utils.tenacity_stop import stop_if_should_exit @@ -205,21 +206,21 @@ async def connect(self): self.log( 'info', f'Starting runtime with image: {self.runtime_container_image}' ) - self._init_container() + await call_sync_from_async(self._init_container) self.log('info', f'Container started: {self.container_name}') else: - self._attach_to_container() + await call_sync_from_async(self._attach_to_container) if not self.attach_to_existing: self.log('info', f'Waiting for client to become ready at {self.api_url}...') self.send_status_message('STATUS$WAITING_FOR_CLIENT') - self._wait_until_alive() + await call_sync_from_async(self._wait_until_alive) if not self.attach_to_existing: self.log('info', 'Runtime is ready.') if not self.attach_to_existing: - self.setup_initial_env() + await call_sync_from_async(self.setup_initial_env) self.log( 'debug', @@ -384,7 +385,6 @@ def _refresh_logs(self): @tenacity.retry( stop=tenacity.stop_after_delay(120) | stop_if_should_exit(), - wait=tenacity.wait_exponential(multiplier=2, min=1, max=20), reraise=(ConnectionRefusedError,), ) def _wait_until_alive(self): @@ -509,9 +509,6 @@ def run_action(self, action: Action) -> Observation: obs = ErrorObservation( f'Action execution timed out after {action.timeout} seconds.', ) - except Exception as e: - self.log('error', f'Error during action execution: {e}') - obs = ErrorObservation(f'Action execution failed: {str(e)}') self._refresh_logs() return obs diff --git a/openhands/server/listen.py b/openhands/server/listen.py index fc740e80293..b5752dcee1e 100644 --- a/openhands/server/listen.py +++ b/openhands/server/listen.py @@ -236,9 +236,11 @@ async def attach_session(request: Request, call_next): content={'error': 'Invalid token'}, ) + print('CONNECTING TO CONVO') request.state.conversation = await session_manager.attach_to_conversation( request.state.sid ) + print('CONNECTED TO CONVO') if request.state.conversation is None: return JSONResponse( status_code=status.HTTP_404_NOT_FOUND, @@ -477,11 +479,13 @@ async def list_files(request: Request, path: str | None = None): file_list = [f for f in file_list if f not in FILES_TO_IGNORE] - def filter_for_gitignore(file_list, base_path): + async def filter_for_gitignore(file_list, base_path): gitignore_path = os.path.join(base_path, '.gitignore') try: read_action = FileReadAction(gitignore_path) - observation = runtime.run_action(read_action) + observation = await asyncio.create_task( + call_sync_from_async(runtime.run_action, read_action) + ) spec = PathSpec.from_lines( GitWildMatchPattern, observation.content.splitlines() ) @@ -491,7 +495,7 @@ def filter_for_gitignore(file_list, base_path): file_list = [entry for entry in file_list if not spec.match_file(entry)] return file_list - file_list = filter_for_gitignore(file_list, '') + file_list = await filter_for_gitignore(file_list, '') return file_list diff --git a/openhands/server/session/agent_session.py b/openhands/server/session/agent_session.py index b7fd08ba823..fc5925cc2c2 100644 --- a/openhands/server/session/agent_session.py +++ b/openhands/server/session/agent_session.py @@ -145,6 +145,10 @@ async def close(self): self._closed = True + async def stop_agent_loop_for_error(self): + if self.controller is not None: + await self.controller.set_agent_state_to(AgentState.ERROR) + def _create_security_analyzer(self, security_analyzer: str | None): """Creates a SecurityAnalyzer instance that will be used to analyze the agent actions @@ -189,6 +193,10 @@ async def _create_runtime( await self.runtime.connect() except Exception as e: logger.error(f'Runtime initialization failed: {e}', exc_info=True) + if self._status_callback: + self._status_callback( + 'error', 'STATUS$ERROR_RUNTIME_DISCONNECTED', str(e) + ) raise if self.runtime is not None: diff --git a/openhands/server/session/session.py b/openhands/server/session/session.py index f5bdc20e0c6..158ea05e593 100644 --- a/openhands/server/session/session.py +++ b/openhands/server/session/session.py @@ -197,8 +197,11 @@ async def send_message(self, message: str) -> bool: """Sends a message to the client.""" return await self.send({'message': message}) - async def send_status_message(self, msg_type: str, id: str, message: str) -> bool: + async def _send_status_message(self, msg_type: str, id: str, message: str) -> bool: """Sends a status message to the client.""" + if msg_type == 'error': + await self.agent_session.stop_agent_loop_for_error() + return await self.send( {'status_update': True, 'type': msg_type, 'id': id, 'message': message} ) @@ -217,7 +220,7 @@ def load_from_data(self, data: dict) -> bool: def queue_status_message(self, msg_type: str, id: str, message: str): """Queues a status message to be sent asynchronously.""" - # Ensure the coroutine runs in the main event loop + print('QUEUE STATUS MESSAGE', msg_type, id, message) asyncio.run_coroutine_threadsafe( - self.send_status_message(msg_type, id, message), self.loop + self._send_status_message(msg_type, id, message), self.loop ) From 0012ed4616f4cc77af4638a645968bd6f6c0eb2c Mon Sep 17 00:00:00 2001 From: Robert Brennan Date: Wed, 30 Oct 2024 19:22:01 -0400 Subject: [PATCH 37/60] logspam --- openhands/runtime/base.py | 2 -- openhands/server/listen.py | 2 -- openhands/server/session/session.py | 1 - 3 files changed, 5 deletions(-) diff --git a/openhands/runtime/base.py b/openhands/runtime/base.py index c1471dfc438..48a2905fe9a 100644 --- a/openhands/runtime/base.py +++ b/openhands/runtime/base.py @@ -115,7 +115,6 @@ def send_status_message(self, message_id: str): def send_error_message(self, message_id: str, message: str): if self.status_callback: - print('RUNTIME STATUS CALLBACK', message_id, message) self.status_callback('error', message_id, message) # ==================================================================== @@ -158,7 +157,6 @@ async def on_event(self, event: Event) -> None: ) except Exception as e: err_id = '' - print('e class', e.__class__) if isinstance(e, ConnectionError): err_id = 'STATUS$ERROR_RUNTIME_DISCONNECTED' self.log('error', f'Unexpected error while running action {e}') diff --git a/openhands/server/listen.py b/openhands/server/listen.py index b5752dcee1e..dceda9cb952 100644 --- a/openhands/server/listen.py +++ b/openhands/server/listen.py @@ -236,11 +236,9 @@ async def attach_session(request: Request, call_next): content={'error': 'Invalid token'}, ) - print('CONNECTING TO CONVO') request.state.conversation = await session_manager.attach_to_conversation( request.state.sid ) - print('CONNECTED TO CONVO') if request.state.conversation is None: return JSONResponse( status_code=status.HTTP_404_NOT_FOUND, diff --git a/openhands/server/session/session.py b/openhands/server/session/session.py index 158ea05e593..f4274eae2f8 100644 --- a/openhands/server/session/session.py +++ b/openhands/server/session/session.py @@ -220,7 +220,6 @@ def load_from_data(self, data: dict) -> bool: def queue_status_message(self, msg_type: str, id: str, message: str): """Queues a status message to be sent asynchronously.""" - print('QUEUE STATUS MESSAGE', msg_type, id, message) asyncio.run_coroutine_threadsafe( self._send_status_message(msg_type, id, message), self.loop ) From a81975f60753875e314f339461f949b0d81301fb Mon Sep 17 00:00:00 2001 From: Robert Brennan Date: Wed, 30 Oct 2024 19:23:36 -0400 Subject: [PATCH 38/60] remove another retry --- openhands/runtime/impl/eventstream/eventstream_runtime.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/openhands/runtime/impl/eventstream/eventstream_runtime.py b/openhands/runtime/impl/eventstream/eventstream_runtime.py index df7b9110676..bef8243303d 100644 --- a/openhands/runtime/impl/eventstream/eventstream_runtime.py +++ b/openhands/runtime/impl/eventstream/eventstream_runtime.py @@ -239,10 +239,6 @@ def _init_docker_client() -> docker.DockerClient: ) raise ex - @tenacity.retry( - stop=tenacity.stop_after_attempt(5) | stop_if_should_exit(), - wait=tenacity.wait_exponential(multiplier=1, min=4, max=60), - ) def _init_container(self): try: self.log('debug', 'Preparing to start container...') From 779889698e850f23cf49958d8efdffc57eb5eddf Mon Sep 17 00:00:00 2001 From: Robert Brennan Date: Wed, 30 Oct 2024 20:28:42 -0400 Subject: [PATCH 39/60] refactor remote runtime error handling --- openhands/runtime/base.py | 12 +- .../runtime/impl/remote/remote_runtime.py | 229 +++++++----------- 2 files changed, 103 insertions(+), 138 deletions(-) diff --git a/openhands/runtime/base.py b/openhands/runtime/base.py index 48a2905fe9a..f10066d44e5 100644 --- a/openhands/runtime/base.py +++ b/openhands/runtime/base.py @@ -42,6 +42,14 @@ } +class RuntimeNotReadyError(Exception): + pass + + +class RuntimeDisconnectedError(Exception): + pass + + def _default_env_vars(sandbox_config: SandboxConfig) -> dict[str, str]: ret = {} for key in os.environ: @@ -157,7 +165,9 @@ async def on_event(self, event: Event) -> None: ) except Exception as e: err_id = '' - if isinstance(e, ConnectionError): + if isinstance(e, ConnectionError) or isinstance( + e, RuntimeDisconnectedError + ): err_id = 'STATUS$ERROR_RUNTIME_DISCONNECTED' self.log('error', f'Unexpected error while running action {e}') self.log('error', f'Problematic action: {str(event)}') diff --git a/openhands/runtime/impl/remote/remote_runtime.py b/openhands/runtime/impl/remote/remote_runtime.py index c9f9d230aa5..eac157e0515 100644 --- a/openhands/runtime/impl/remote/remote_runtime.py +++ b/openhands/runtime/impl/remote/remote_runtime.py @@ -1,12 +1,11 @@ import os import tempfile import threading -import time from typing import Callable, Optional from zipfile import ZipFile import requests -from requests.exceptions import Timeout +import tenacity from openhands.core.config import AppConfig from openhands.events import EventStream @@ -27,7 +26,11 @@ ) from openhands.events.serialization import event_to_dict, observation_from_dict from openhands.events.serialization.action import ACTION_TYPE_TO_CLASS -from openhands.runtime.base import Runtime +from openhands.runtime.base import ( + Runtime, + RuntimeDisconnectedError, + RuntimeNotReadyError, +) from openhands.runtime.builder.remote import RemoteRuntimeBuilder from openhands.runtime.plugins import PluginRequirement from openhands.runtime.utils.command import get_remote_startup_command @@ -35,6 +38,8 @@ send_request, ) from openhands.runtime.utils.runtime_build import build_runtime_image +from openhands.utils.async_utils import call_sync_from_async +from openhands.utils.tenacity_stop import stop_if_should_exit class RemoteRuntime(Runtime): @@ -73,7 +78,7 @@ def __init__( if self.config.workspace_base is not None: self.log( - 'warning', + 'debug', 'Setting workspace_base is not supported in the remote runtime.', ) @@ -84,9 +89,9 @@ def __init__( self.runtime_url: str | None = None async def connect(self): - self._start_or_attach_to_runtime() - self._wait_until_alive() - self.setup_initial_env() + await call_sync_from_async(self._start_or_attach_to_runtime) + await call_sync_from_async(self._wait_until_alive) + await call_sync_from_async(self.setup_initial_env) def _start_or_attach_to_runtime(self): existing_runtime = self._check_existing_runtime() @@ -265,56 +270,50 @@ def _parse_runtime_response(self, response: requests.Response): {'X-Session-API-Key': start_response['session_api_key']} ) + @tenacity.retry( + stop=tenacity.stop_after_delay(120) | stop_if_should_exit(), + reraise=True, + retry=tenacity.retry_if_exception_type(RuntimeNotReadyError), + ) def _wait_until_alive(self): self.log('debug', f'Waiting for runtime to be alive at url: {self.runtime_url}') - # send GET request to /runtime/ - pod_running = False - max_not_found_count = 12 # 2 minutes - not_found_count = 0 - while not pod_running: - runtime_info_response = send_request( + runtime_info_response = send_request( + self.session, + 'GET', + f'{self.config.sandbox.remote_runtime_api_url}/runtime/{self.runtime_id}', + ) + if runtime_info_response.status_code != 200: + raise RuntimeError( + f'Failed to get runtime status: {runtime_info_response.status_code}. Response: {runtime_info_response.text}' + ) + runtime_data = runtime_info_response.json() + assert 'runtime_id' in runtime_data + assert 'pod_status' in runtime_data + self.runtime_id = runtime_data['runtime_id'] + pod_status = runtime_data['pod_status'] + if pod_status == 'Ready': + response = send_request( self.session, 'GET', - f'{self.config.sandbox.remote_runtime_api_url}/runtime/{self.runtime_id}', + f'{self.runtime_url}/alive', ) - if runtime_info_response.status_code != 200: - raise RuntimeError( - f'Failed to get runtime status: {runtime_info_response.status_code}. Response: {runtime_info_response.text}' - ) - runtime_data = runtime_info_response.json() - assert runtime_data['runtime_id'] == self.runtime_id - pod_status = runtime_data['pod_status'] - self.log( - 'debug', - f'Waiting for runtime pod to be active. Current status: {pod_status}', + if response.status_code != 200: + msg = f'Runtime (ID={self.runtime_id}) is not alive yet, even though it reported Ready. Status: {response.status_code}.' + self.log('warning', msg) + raise RuntimeError(msg) + return + if pod_status in ('Failed', 'Unknown', 'Not Found'): + # clean up the runtime + self.close() + raise RuntimeError( + f'Runtime (ID={self.runtime_id}) failed to start. Current status: {pod_status}' ) - if pod_status == 'Ready': - pod_running = True - break - elif pod_status == 'Not Found' and not_found_count < max_not_found_count: - not_found_count += 1 - self.log( - 'debug', - f'Runtime pod not found. Count: {not_found_count} / {max_not_found_count}', - ) - elif pod_status in ('Failed', 'Unknown', 'Not Found'): - # clean up the runtime - self.close() - raise RuntimeError( - f'Runtime (ID={self.runtime_id}) failed to start. Current status: {pod_status}' - ) - # Pending otherwise - add proper sleep - time.sleep(10) - response = send_request( - self.session, - 'GET', - f'{self.runtime_url}/alive', + self.log( + 'debug', + f'Waiting for runtime pod to be active. Current status: {pod_status}', ) - if response.status_code != 200: - msg = f'Runtime (ID={self.runtime_id}) is not alive yet. Status: {response.status_code}.' - self.log('warning', msg) - raise RuntimeError(msg) + raise RuntimeNotReadyError() def close(self, timeout: int = 10): if self.config.sandbox.keep_remote_runtime_alive or self.attach_to_existing: @@ -371,27 +370,31 @@ def run_action(self, action: Action) -> Observation: json=request_body, timeout=action.timeout, ) - if response.status_code == 200: - output = response.json() - obs = observation_from_dict(output) - obs._cause = action.id # type: ignore[attr-defined] - return obs - else: - error_message = response.text - self.log('error', f'Error from server: {error_message}') - obs = ErrorObservation(f'Action execution failed: {error_message}') - except Timeout: - self.log('error', 'No response received within the timeout period.') + output = response.json() + obs = observation_from_dict(output) + obs._cause = action.id # type: ignore[attr-defined] + except requests.Timeout: obs = ErrorObservation( f'[Runtime (ID={self.runtime_id})] Action execution timed out', ) - except Exception as e: - self.log('error', f'Error during action execution: {e}') - obs = ErrorObservation( - f'[Runtime (ID={self.runtime_id})] Action execution failed: {str(e)}', - ) + return obs return obs + def _send_request(self, *args, **kwargs): + try: + return send_request(self.session, *args, **kwargs) + except requests.Timeout: + self.log('error', 'No response received within the timeout period.') + raise + except requests.HTTPError as e: + print('got http error', e) + if e.response.status_code == 404: + raise RuntimeDisconnectedError() + else: + raise e + except Exception as e: + print('Got unknown exception', e.__class__) + def run(self, action: CmdRunAction) -> Observation: return self.run_action(action) @@ -438,32 +441,16 @@ def copy_to( params = {'destination': sandbox_dest, 'recursive': str(recursive).lower()} - response = send_request( - self.session, + response = self._send_request( 'POST', f'{self.runtime_url}/upload_file', files=upload_data, params=params, timeout=300, ) - if response.status_code == 200: - self.log( - 'debug', - f'Copy completed: host:{host_src} -> runtime:{sandbox_dest}. Response: {response.text}', - ) - return - else: - error_message = response.text - raise Exception( - f'[Runtime (ID={self.runtime_id})] Copy operation failed: {error_message}' - ) - except TimeoutError: - raise TimeoutError( - f'[Runtime (ID={self.runtime_id})] Copy operation timed out' - ) - except Exception as e: - raise RuntimeError( - f'[Runtime (ID={self.runtime_id})] Copy operation failed: {str(e)}' + self.log( + 'debug', + f'Copy completed: host:{host_src} -> runtime:{sandbox_dest}. Response: {response.text}', ) finally: if recursive: @@ -473,59 +460,27 @@ def copy_to( ) def list_files(self, path: str | None = None) -> list[str]: - try: - data = {} - if path is not None: - data['path'] = path + data = {} + if path is not None: + data['path'] = path - response = send_request( - self.session, - 'POST', - f'{self.runtime_url}/list_files', - json=data, - timeout=30, - ) - if response.status_code == 200: - response_json = response.json() - assert isinstance(response_json, list) - return response_json - else: - error_message = response.text - raise Exception( - f'[Runtime (ID={self.runtime_id})] List files operation failed: {error_message}' - ) - except TimeoutError: - raise TimeoutError( - f'[Runtime (ID={self.runtime_id})] List files operation timed out' - ) - except Exception as e: - raise RuntimeError( - f'[Runtime (ID={self.runtime_id})] List files operation failed: {str(e)}' - ) + response = self._send_request( + 'POST', + f'{self.runtime_url}/list_files', + json=data, + timeout=30, + ) + response_json = response.json() + assert isinstance(response_json, list) + return response_json def copy_from(self, path: str) -> bytes: """Zip all files in the sandbox and return as a stream of bytes.""" - try: - params = {'path': path} - response = send_request( - self.session, - 'GET', - f'{self.runtime_url}/download_files', - params=params, - timeout=30, - ) - if response.status_code == 200: - return response.content - else: - error_message = response.text - raise Exception( - f'[Runtime (ID={self.runtime_id})] Copy operation failed: {error_message}' - ) - except requests.Timeout: - raise TimeoutError( - f'[Runtime (ID={self.runtime_id})] Copy operation timed out' - ) - except Exception as e: - raise RuntimeError( - f'[Runtime (ID={self.runtime_id})] Copy operation failed: {str(e)}' - ) + params = {'path': path} + response = self._send_request( + 'GET', + f'{self.runtime_url}/download_files', + params=params, + timeout=30, + ) + return response.content From c5369d8142909badb1ff9f5762993277d1e6d9c0 Mon Sep 17 00:00:00 2001 From: Robert Brennan Date: Wed, 30 Oct 2024 20:47:54 -0400 Subject: [PATCH 40/60] more refactoring --- .../runtime/impl/remote/remote_runtime.py | 96 +++++++------------ 1 file changed, 35 insertions(+), 61 deletions(-) diff --git a/openhands/runtime/impl/remote/remote_runtime.py b/openhands/runtime/impl/remote/remote_runtime.py index eac157e0515..a29ce5b29c8 100644 --- a/openhands/runtime/impl/remote/remote_runtime.py +++ b/openhands/runtime/impl/remote/remote_runtime.py @@ -130,41 +130,37 @@ def _start_or_attach_to_runtime(self): def _check_existing_runtime(self) -> bool: try: - response = send_request( - self.session, + response = self._send_request( 'GET', f'{self.config.sandbox.remote_runtime_api_url}/runtime/{self.sid}', timeout=5, ) - except Exception as e: + except requests.HTTPError as e: + if e.response.status_code == 404: + return False self.log('debug', f'Error while looking for remote runtime: {e}') - return False + raise - if response.status_code == 200: - data = response.json() - status = data.get('status') - if status == 'running': - self._parse_runtime_response(response) - return True - elif status == 'stopped': - self.log('debug', 'Found existing remote runtime, but it is stopped') - return False - elif status == 'paused': - self.log('debug', 'Found existing remote runtime, but it is paused') - self._parse_runtime_response(response) - self._resume_runtime() - return True - else: - self.log('error', f'Invalid response from runtime API: {data}') - return False + data = response.json() + status = data.get('status') + if status == 'running': + self._parse_runtime_response(response) + return True + elif status == 'stopped': + self.log('debug', 'Found existing remote runtime, but it is stopped') + return False + elif status == 'paused': + self.log('debug', 'Found existing remote runtime, but it is paused') + self._parse_runtime_response(response) + self._resume_runtime() + return True else: - self.log('debug', 'Could not find existing remote runtime') + self.log('error', f'Invalid response from runtime API: {data}') return False def _build_runtime(self): self.log('debug', f'Building RemoteRuntime config:\n{self.config}') - response = send_request( - self.session, + response = self._send_request( 'GET', f'{self.config.sandbox.remote_runtime_api_url}/registry_prefix', timeout=10, @@ -194,14 +190,13 @@ def _build_runtime(self): force_rebuild=self.config.sandbox.force_rebuild_runtime, ) - response = send_request( - self.session, + response = self._send_request( 'GET', f'{self.config.sandbox.remote_runtime_api_url}/image_exists', params={'image': self.container_image}, timeout=10, ) - if response.status_code != 200 or not response.json()['exists']: + if not response.json()['exists']: raise RuntimeError(f'Container image {self.container_image} does not exist') def _start_runtime(self): @@ -231,16 +226,11 @@ def _start_runtime(self): } # Start the sandbox using the /start endpoint - response = send_request( - self.session, + response = self._send_request( 'POST', f'{self.config.sandbox.remote_runtime_api_url}/start', json=start_request, ) - if response.status_code != 201: - raise RuntimeError( - f'[Runtime (ID={self.runtime_id})] Failed to start runtime: {response.text}' - ) self._parse_runtime_response(response) self.log( 'debug', @@ -248,17 +238,12 @@ def _start_runtime(self): ) def _resume_runtime(self): - response = send_request( - self.session, + self._send_request( 'POST', f'{self.config.sandbox.remote_runtime_api_url}/resume', json={'runtime_id': self.runtime_id}, timeout=30, ) - if response.status_code != 200: - raise RuntimeError( - f'[Runtime (ID={self.runtime_id})] Failed to resume runtime: {response.text}' - ) self.log('debug', 'Runtime resumed.') def _parse_runtime_response(self, response: requests.Response): @@ -277,30 +262,20 @@ def _parse_runtime_response(self, response: requests.Response): ) def _wait_until_alive(self): self.log('debug', f'Waiting for runtime to be alive at url: {self.runtime_url}') - runtime_info_response = send_request( - self.session, + runtime_info_response = self._send_request( 'GET', f'{self.config.sandbox.remote_runtime_api_url}/runtime/{self.runtime_id}', ) - if runtime_info_response.status_code != 200: - raise RuntimeError( - f'Failed to get runtime status: {runtime_info_response.status_code}. Response: {runtime_info_response.text}' - ) runtime_data = runtime_info_response.json() assert 'runtime_id' in runtime_data + assert runtime_data['runtime_id'] == self.runtime_id assert 'pod_status' in runtime_data - self.runtime_id = runtime_data['runtime_id'] pod_status = runtime_data['pod_status'] if pod_status == 'Ready': - response = send_request( - self.session, + self._send_request( 'GET', f'{self.runtime_url}/alive', - ) - if response.status_code != 200: - msg = f'Runtime (ID={self.runtime_id}) is not alive yet, even though it reported Ready. Status: {response.status_code}.' - self.log('warning', msg) - raise RuntimeError(msg) + ) # will raise exception if we don't get 200 back return if pod_status in ('Failed', 'Unknown', 'Not Found'): # clean up the runtime @@ -319,10 +294,9 @@ def close(self, timeout: int = 10): if self.config.sandbox.keep_remote_runtime_alive or self.attach_to_existing: self.session.close() return - if self.runtime_id: + if self.runtime_id and self.session: try: - response = send_request( - self.session, + response = self._send_request( 'POST', f'{self.config.sandbox.remote_runtime_api_url}/stop', json={'runtime_id': self.runtime_id}, @@ -363,8 +337,7 @@ def run_action(self, action: Action) -> Observation: try: request_body = {'action': event_to_dict(action)} self.log('debug', f'Request body: {request_body}') - response = send_request( - self.session, + response = self._send_request( 'POST', f'{self.runtime_url}/execute_action', json=request_body, @@ -380,15 +353,16 @@ def run_action(self, action: Action) -> Observation: return obs return obs - def _send_request(self, *args, **kwargs): + def _send_request(self, method, url, **kwargs): + is_runtime_request = self.runtime_url in url try: - return send_request(self.session, *args, **kwargs) + return send_request(self.session, method, url, **kwargs) except requests.Timeout: self.log('error', 'No response received within the timeout period.') raise except requests.HTTPError as e: print('got http error', e) - if e.response.status_code == 404: + if is_runtime_request and e.response.status_code == 404: raise RuntimeDisconnectedError() else: raise e From b72239f4967b604ad434dd174b0724a7d8f052c0 Mon Sep 17 00:00:00 2001 From: Robert Brennan Date: Wed, 30 Oct 2024 20:56:51 -0400 Subject: [PATCH 41/60] minor fixes --- openhands/runtime/impl/remote/remote_runtime.py | 9 ++++----- openhands/server/listen.py | 8 ++------ 2 files changed, 6 insertions(+), 11 deletions(-) diff --git a/openhands/runtime/impl/remote/remote_runtime.py b/openhands/runtime/impl/remote/remote_runtime.py index a29ce5b29c8..480f4978e86 100644 --- a/openhands/runtime/impl/remote/remote_runtime.py +++ b/openhands/runtime/impl/remote/remote_runtime.py @@ -354,20 +354,19 @@ def run_action(self, action: Action) -> Observation: return obs def _send_request(self, method, url, **kwargs): - is_runtime_request = self.runtime_url in url + is_runtime_request = self.runtime_url and self.runtime_url in url try: return send_request(self.session, method, url, **kwargs) except requests.Timeout: self.log('error', 'No response received within the timeout period.') raise except requests.HTTPError as e: - print('got http error', e) if is_runtime_request and e.response.status_code == 404: - raise RuntimeDisconnectedError() + raise RuntimeDisconnectedError( + f'404 error while connecting to {self.runtime_url}' + ) else: raise e - except Exception as e: - print('Got unknown exception', e.__class__) def run(self, action: CmdRunAction) -> Observation: return self.run_action(action) diff --git a/openhands/server/listen.py b/openhands/server/listen.py index dceda9cb952..fa2e0c55e10 100644 --- a/openhands/server/listen.py +++ b/openhands/server/listen.py @@ -469,9 +469,7 @@ async def list_files(request: Request, path: str | None = None): ) runtime: Runtime = request.state.conversation.runtime - file_list = await asyncio.create_task( - call_sync_from_async(runtime.list_files, path) - ) + file_list = await call_sync_from_async(runtime.list_files, path) if path: file_list = [os.path.join(path, f) for f in file_list] @@ -481,9 +479,7 @@ async def filter_for_gitignore(file_list, base_path): gitignore_path = os.path.join(base_path, '.gitignore') try: read_action = FileReadAction(gitignore_path) - observation = await asyncio.create_task( - call_sync_from_async(runtime.run_action, read_action) - ) + observation = await call_sync_from_async(runtime.run_action, read_action) spec = PathSpec.from_lines( GitWildMatchPattern, observation.content.splitlines() ) From b8f221fd35a654ae0096b048977b47a4e74dabb2 Mon Sep 17 00:00:00 2001 From: Robert Brennan Date: Wed, 30 Oct 2024 21:07:47 -0400 Subject: [PATCH 42/60] fix session init --- openhands/runtime/impl/remote/remote_runtime.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/openhands/runtime/impl/remote/remote_runtime.py b/openhands/runtime/impl/remote/remote_runtime.py index 480f4978e86..b47f48fb991 100644 --- a/openhands/runtime/impl/remote/remote_runtime.py +++ b/openhands/runtime/impl/remote/remote_runtime.py @@ -57,6 +57,10 @@ def __init__( status_callback: Optional[Callable] = None, attach_to_existing: bool = False, ): + # We need to set session and action_semaphore before the __init__ below, or we get odd errors + self.session = requests.Session() + self.action_semaphore = threading.Semaphore(1) + super().__init__( config, event_stream, @@ -66,15 +70,12 @@ def __init__( status_callback, attach_to_existing, ) - if self.config.sandbox.api_key is None: raise ValueError( 'API key is required to use the remote runtime. ' 'Please set the API key in the config (config.toml) or as an environment variable (SANDBOX_API_KEY).' ) - self.session = requests.Session() self.session.headers.update({'X-API-Key': self.config.sandbox.api_key}) - self.action_semaphore = threading.Semaphore(1) if self.config.workspace_base is not None: self.log( From 29930514827fdd55bc55496d36fd991c03ea4426 Mon Sep 17 00:00:00 2001 From: Robert Brennan Date: Wed, 30 Oct 2024 21:10:19 -0400 Subject: [PATCH 43/60] add fixme --- openhands/controller/agent_controller.py | 1 + 1 file changed, 1 insertion(+) diff --git a/openhands/controller/agent_controller.py b/openhands/controller/agent_controller.py index 8d6a55e0b52..35a5c4a5bbd 100644 --- a/openhands/controller/agent_controller.py +++ b/openhands/controller/agent_controller.py @@ -544,6 +544,7 @@ async def _handle_traffic_control( f'Agent reached maximum {limit_type}, task paused. ' f'Current {limit_type}: {current_value:.2f}, max {limit_type}: {max_value:.2f}. ' ) + # FIXME: this isn't really an exception--we should have a different path await self._react_to_exception( e, new_state=AgentState.PAUSED, From 43eb0d3aece5be15dcced7d8c9cfbb1668076e6e Mon Sep 17 00:00:00 2001 From: Robert Brennan Date: Thu, 31 Oct 2024 11:54:23 -0400 Subject: [PATCH 44/60] fix lint --- frontend/src/components/error-message.tsx | 21 ++++++++++++++------- frontend/src/services/actions.ts | 10 +++++++--- frontend/src/state/chatSlice.ts | 2 +- 3 files changed, 22 insertions(+), 11 deletions(-) diff --git a/frontend/src/components/error-message.tsx b/frontend/src/components/error-message.tsx index 439067ffc9a..88b02aae15c 100644 --- a/frontend/src/components/error-message.tsx +++ b/frontend/src/components/error-message.tsx @@ -2,14 +2,14 @@ import { useState, useEffect } from "react"; import { useTranslation } from "react-i18next"; interface ErrorMessageProps { - error: string; + id: string; message: string; } export function ErrorMessage({ id, message }: ErrorMessageProps) { const { t, i18n } = useTranslation(); const [showDetails, setShowDetails] = useState(true); - const [headline, setHeadline] = useState(''); + const [headline, setHeadline] = useState(""); const [details, setDetails] = useState(message); useEffect(() => { @@ -23,11 +23,18 @@ export function ErrorMessage({ id, message }: ErrorMessageProps) { return (
- {(headline &&

{headline}

)} - setShowDetails(!showDetails)} className="cursor-pointer"> - {(headline && (showDetails ? t('ERROR_MESSAGE$HIDE_DETAILS') : t('ERROR_MESSAGE$SHOW_DETAILS')))} - - {(showDetails &&

{details}

)} + {headline &&

{headline}

} + + {showDetails &&

{details}

}
); diff --git a/frontend/src/services/actions.ts b/frontend/src/services/actions.ts index 36e6e473021..ccdff694e87 100644 --- a/frontend/src/services/actions.ts +++ b/frontend/src/services/actions.ts @@ -1,4 +1,8 @@ -import { addAssistantMessage, addUserMessage, addErrorMessage } from "#/state/chatSlice"; +import { + addAssistantMessage, + addUserMessage, + addErrorMessage, +} from "#/state/chatSlice"; import { setCode, setActiveFilepath } from "#/state/codeSlice"; import { appendJupyterInput } from "#/state/jupyterSlice"; import { @@ -119,13 +123,13 @@ export function handleActionMessage(message: ActionMessage) { } export function handleStatusMessage(message: StatusMessage) { - if (message.type === 'info') { + if (message.type === "info") { store.dispatch( setCurStatusMessage({ ...message, }), ); - } else if (message.type === 'error') { + } else if (message.type === "error") { store.dispatch( addErrorMessage({ ...message, diff --git a/frontend/src/state/chatSlice.ts b/frontend/src/state/chatSlice.ts index 375e3ba260f..996d731a11b 100644 --- a/frontend/src/state/chatSlice.ts +++ b/frontend/src/state/chatSlice.ts @@ -39,7 +39,7 @@ export const chatSlice = createSlice({ addErrorMessage( state, - action: PayloadAction<{ id: string, message: string }>, + action: PayloadAction<{ id: string; message: string }>, ) { const { id, message } = action.payload; state.messages.push({ id, message, error: true }); From 1002d44ade6843dedfe1eabca983166b739fbb5f Mon Sep 17 00:00:00 2001 From: Robert Brennan Date: Thu, 31 Oct 2024 11:55:18 -0400 Subject: [PATCH 45/60] change test name --- tests/unit/test_agent_controller.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/test_agent_controller.py b/tests/unit/test_agent_controller.py index e6de80d274b..751bf6231bb 100644 --- a/tests/unit/test_agent_controller.py +++ b/tests/unit/test_agent_controller.py @@ -97,7 +97,7 @@ async def test_on_event_change_agent_state_action(mock_agent, mock_event_stream) @pytest.mark.asyncio -async def test__react_to_error(mock_agent, mock_event_stream): +async def test_react_to_exception(mock_agent, mock_event_stream): controller = AgentController( agent=mock_agent, event_stream=mock_event_stream, From 4f69d385abcdaa2f136443f3c8da0661b6e033bb Mon Sep 17 00:00:00 2001 From: Robert Brennan Date: Thu, 31 Oct 2024 12:55:16 -0400 Subject: [PATCH 46/60] lint --- frontend/src/components/chat/message.d.ts | 3 ++- frontend/src/routes/_oh.app.tsx | 2 +- frontend/src/state/statusSlice.ts | 6 ++++-- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/frontend/src/components/chat/message.d.ts b/frontend/src/components/chat/message.d.ts index e7248fbd648..6dd3181e93c 100644 --- a/frontend/src/components/chat/message.d.ts +++ b/frontend/src/components/chat/message.d.ts @@ -6,6 +6,7 @@ type Message = { }; type ErrorMessage = { - error: string; + error: boolean; + id: string; message: string; }; diff --git a/frontend/src/routes/_oh.app.tsx b/frontend/src/routes/_oh.app.tsx index 9af5ad278bb..b73d4dc2d66 100644 --- a/frontend/src/routes/_oh.app.tsx +++ b/frontend/src/routes/_oh.app.tsx @@ -196,7 +196,7 @@ function App() { }), ); } else { - dispatch(addErrorMessage({ error, message: details })); + dispatch(addErrorMessage({ message: details })); } }; diff --git a/frontend/src/state/statusSlice.ts b/frontend/src/state/statusSlice.ts index b0b503d6c6b..6f5158c9f6d 100644 --- a/frontend/src/state/statusSlice.ts +++ b/frontend/src/state/statusSlice.ts @@ -2,8 +2,10 @@ import { createSlice, PayloadAction } from "@reduxjs/toolkit"; import { StatusMessage } from "#/types/Message"; const initialStatusMessage: StatusMessage = { - status: "", - is_error: false, + status_update: true, + type: "info", + id: "", + message: "", }; export const statusSlice = createSlice({ From 5efc862d215a28bf18698d3004d281232bff2e84 Mon Sep 17 00:00:00 2001 From: Robert Brennan Date: Thu, 31 Oct 2024 14:04:04 -0400 Subject: [PATCH 47/60] fix tests --- frontend/__tests__/components/chat/chat-interface.test.tsx | 4 ++-- frontend/src/components/chat/message.d.ts | 2 +- frontend/src/components/error-message.tsx | 4 ++-- frontend/src/routes/_oh.app.tsx | 1 - frontend/src/state/chatSlice.ts | 2 +- 5 files changed, 6 insertions(+), 7 deletions(-) diff --git a/frontend/__tests__/components/chat/chat-interface.test.tsx b/frontend/__tests__/components/chat/chat-interface.test.tsx index d27897c2d8e..501389f9897 100644 --- a/frontend/__tests__/components/chat/chat-interface.test.tsx +++ b/frontend/__tests__/components/chat/chat-interface.test.tsx @@ -128,14 +128,14 @@ describe.skip("ChatInterface", () => { timestamp: new Date().toISOString(), }, { - error: "Woops!", + error: true, + id: "", message: "Something went wrong", }, ]; renderChatInterface(messages); const error = screen.getByTestId("error-message"); - expect(within(error).getByText("Woops!")).toBeInTheDocument(); expect(within(error).getByText("Something went wrong")).toBeInTheDocument(); }); diff --git a/frontend/src/components/chat/message.d.ts b/frontend/src/components/chat/message.d.ts index 6dd3181e93c..b2ccf43c994 100644 --- a/frontend/src/components/chat/message.d.ts +++ b/frontend/src/components/chat/message.d.ts @@ -7,6 +7,6 @@ type Message = { type ErrorMessage = { error: boolean; - id: string; + id?: string; message: string; }; diff --git a/frontend/src/components/error-message.tsx b/frontend/src/components/error-message.tsx index 88b02aae15c..75165a9a60f 100644 --- a/frontend/src/components/error-message.tsx +++ b/frontend/src/components/error-message.tsx @@ -2,7 +2,7 @@ import { useState, useEffect } from "react"; import { useTranslation } from "react-i18next"; interface ErrorMessageProps { - id: string; + id?: string; message: string; } @@ -13,7 +13,7 @@ export function ErrorMessage({ id, message }: ErrorMessageProps) { const [details, setDetails] = useState(message); useEffect(() => { - if (i18n.exists(id)) { + if (id && i18n.exists(id)) { setHeadline(t(id)); setDetails(message); setShowDetails(false); diff --git a/frontend/src/routes/_oh.app.tsx b/frontend/src/routes/_oh.app.tsx index b73d4dc2d66..cb1ef3b9c7e 100644 --- a/frontend/src/routes/_oh.app.tsx +++ b/frontend/src/routes/_oh.app.tsx @@ -191,7 +191,6 @@ function App() { if (!details) { dispatch( addErrorMessage({ - error: "An error has occured", message: error, }), ); diff --git a/frontend/src/state/chatSlice.ts b/frontend/src/state/chatSlice.ts index 996d731a11b..7d77901fee7 100644 --- a/frontend/src/state/chatSlice.ts +++ b/frontend/src/state/chatSlice.ts @@ -39,7 +39,7 @@ export const chatSlice = createSlice({ addErrorMessage( state, - action: PayloadAction<{ id: string; message: string }>, + action: PayloadAction<{ id?: string; message: string }>, ) { const { id, message } = action.payload; state.messages.push({ id, message, error: true }); From 75d8028fe6bca9258e2f42450a941f8e0cdccd55 Mon Sep 17 00:00:00 2001 From: Robert Brennan Date: Thu, 31 Oct 2024 14:22:18 -0400 Subject: [PATCH 48/60] fix some tests --- openhands/controller/agent_controller.py | 12 +++--------- tests/unit/test_agent_controller.py | 11 +++++------ 2 files changed, 8 insertions(+), 15 deletions(-) diff --git a/openhands/controller/agent_controller.py b/openhands/controller/agent_controller.py index 35a5c4a5bbd..3027f57a8d5 100644 --- a/openhands/controller/agent_controller.py +++ b/openhands/controller/agent_controller.py @@ -145,11 +145,8 @@ async def update_state_after_step(self): async def _react_to_exception( self, e: Exception, - new_state: AgentState = AgentState.ERROR, ): - if new_state is not None: - # it's important to set the state before adding the error event, so that metrics sync properly - await self.set_agent_state_to(new_state) + await self.set_agent_state_to(AgentState.ERROR) if self._status_callback is not None: err_id = '' if isinstance(e, litellm.AuthenticationError): @@ -541,14 +538,11 @@ async def _handle_traffic_control( await self._react_to_exception(e) else: e = RuntimeError( - f'Agent reached maximum {limit_type}, task paused. ' + f'Agent reached maximum {limit_type}. ' f'Current {limit_type}: {current_value:.2f}, max {limit_type}: {max_value:.2f}. ' ) # FIXME: this isn't really an exception--we should have a different path - await self._react_to_exception( - e, - new_state=AgentState.PAUSED, - ) + await self._react_to_exception(e) stop_step = True return stop_step diff --git a/tests/unit/test_agent_controller.py b/tests/unit/test_agent_controller.py index 751bf6231bb..79ba0ecdbd1 100644 --- a/tests/unit/test_agent_controller.py +++ b/tests/unit/test_agent_controller.py @@ -7,7 +7,6 @@ from openhands.controller.agent_controller import AgentController from openhands.controller.state.state import TrafficControlState from openhands.core.config import AppConfig -from openhands.core.exceptions import LLMMalformedActionError from openhands.core.main import run_controller from openhands.core.schema import AgentState from openhands.events import Event, EventSource, EventStream, EventStreamSubscriber @@ -107,7 +106,7 @@ async def test_react_to_exception(mock_agent, mock_event_stream): headless_mode=True, ) error_message = 'Test error' - await controller._react_to_error(error_message) + await controller._react_to_exception(error_message) assert controller.state.last_error == error_message controller.event_stream.add_event.assert_called_once() await controller.close() @@ -124,12 +123,12 @@ async def test_step_with_exception(mock_agent, mock_event_stream): headless_mode=True, ) controller.state.agent_state = AgentState.RUNNING - controller._react_to_error = AsyncMock() - controller.agent.step.side_effect = LLMMalformedActionError('Malformed action') + controller._react_to_exception = AsyncMock() + controller.agent.step.side_effect = RuntimeError('Test error') await controller._step() - # Verify that _react_to_error was called with the correct error message - controller._react_to_error.assert_called_once_with('Malformed action') + # Verify that _react_to_exception was called with the correct error message + controller._react_to_exception.assert_called_once_with('') await controller.close() From fa25ad1b246d326ff8b2dd4d7755b35efcce48f7 Mon Sep 17 00:00:00 2001 From: Robert Brennan Date: Thu, 31 Oct 2024 14:24:22 -0400 Subject: [PATCH 49/60] better names --- tests/unit/test_agent_controller.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/tests/unit/test_agent_controller.py b/tests/unit/test_agent_controller.py index 79ba0ecdbd1..2cc43f9a724 100644 --- a/tests/unit/test_agent_controller.py +++ b/tests/unit/test_agent_controller.py @@ -146,14 +146,14 @@ async def test_run_controller_with_fatal_error(mock_agent, mock_event_stream): agent.llm.metrics = Metrics() agent.llm.config = config.get_llm_config() - fatal_error_obs = ErrorObservation('Fatal error detected', fatal=True) - fatal_error_obs._cause = event.id + error_obs = ErrorObservation('You messed around with Jim') + error_obs._cause = event.id runtime = MagicMock(spec=Runtime) async def on_event(event: Event): if isinstance(event, CmdRunAction): - await event_stream.async_add_event(fatal_error_obs, EventSource.USER) + await event_stream.async_add_event(error_obs, EventSource.USER) event_stream.subscribe(EventStreamSubscriber.RUNTIME, on_event) runtime.event_stream = event_stream @@ -172,10 +172,7 @@ async def on_event(event: Event): # it will first become AgentState.ERROR, then become AgentState.STOPPED # in side run_controller (since the while loop + sleep no longer loop) assert state.agent_state == AgentState.STOPPED - assert ( - state.get_last_error() - == 'There was a fatal error during agent execution: **ErrorObservation**\nFatal error detected' - ) + assert state.get_last_error() == 'You messed around with Jim' assert len(list(event_stream.get_events())) == 5 From 0a5e90028afc0fe7b3365789e8d6d2fe35f5d9cb Mon Sep 17 00:00:00 2001 From: Robert Brennan Date: Thu, 31 Oct 2024 14:25:16 -0400 Subject: [PATCH 50/60] fix message --- tests/unit/test_agent_controller.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tests/unit/test_agent_controller.py b/tests/unit/test_agent_controller.py index 2cc43f9a724..c4f9b97b0ad 100644 --- a/tests/unit/test_agent_controller.py +++ b/tests/unit/test_agent_controller.py @@ -242,10 +242,7 @@ async def on_event(event: Event): # it will first become AgentState.ERROR, then become AgentState.STOPPED # in side run_controller (since the while loop + sleep no longer loop) assert state.agent_state == AgentState.STOPPED - assert ( - state.last_error - == 'There was a fatal error during agent execution: **FatalErrorObservation**\nAgent got stuck in a loop' - ) + assert state.last_error == 'Agent got stuck in a loop' @pytest.mark.asyncio From 019aed60937fa07914c5290bff7003dcaf1dd019 Mon Sep 17 00:00:00 2001 From: Robert Brennan Date: Thu, 31 Oct 2024 14:34:54 -0400 Subject: [PATCH 51/60] fix more tests --- tests/unit/test_agent_controller.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/tests/unit/test_agent_controller.py b/tests/unit/test_agent_controller.py index c4f9b97b0ad..25a1124472f 100644 --- a/tests/unit/test_agent_controller.py +++ b/tests/unit/test_agent_controller.py @@ -43,6 +43,11 @@ def mock_event_stream(): return MagicMock(spec=EventStream) +@pytest.fixture +def mock_status_callback(): + return AsyncMock() + + @pytest.mark.asyncio async def test_set_agent_state(mock_agent, mock_event_stream): controller = AgentController( @@ -96,19 +101,19 @@ async def test_on_event_change_agent_state_action(mock_agent, mock_event_stream) @pytest.mark.asyncio -async def test_react_to_exception(mock_agent, mock_event_stream): +async def test_react_to_exception(mock_agent, mock_event_stream, mock_status_callback): controller = AgentController( agent=mock_agent, event_stream=mock_event_stream, + status_callback=mock_status_callback, max_iterations=10, sid='test', confirmation_mode=False, headless_mode=True, ) error_message = 'Test error' - await controller._react_to_exception(error_message) - assert controller.state.last_error == error_message - controller.event_stream.add_event.assert_called_once() + await controller._react_to_exception(RuntimeError(error_message)) + controller._status_callback.assert_called_once() await controller.close() @@ -242,7 +247,7 @@ async def on_event(event: Event): # it will first become AgentState.ERROR, then become AgentState.STOPPED # in side run_controller (since the while loop + sleep no longer loop) assert state.agent_state == AgentState.STOPPED - assert state.last_error == 'Agent got stuck in a loop' + assert state.get_last_error() == 'Agent got stuck in a loop' @pytest.mark.asyncio From 0793e9fc9dafdd13e66d0f561c37fc6660986412 Mon Sep 17 00:00:00 2001 From: Robert Brennan Date: Thu, 31 Oct 2024 14:48:44 -0400 Subject: [PATCH 52/60] remove redundant 200 checks --- .../impl/eventstream/eventstream_runtime.py | 52 ++++--------------- 1 file changed, 10 insertions(+), 42 deletions(-) diff --git a/openhands/runtime/impl/eventstream/eventstream_runtime.py b/openhands/runtime/impl/eventstream/eventstream_runtime.py index bef8243303d..ddc47d0c66a 100644 --- a/openhands/runtime/impl/eventstream/eventstream_runtime.py +++ b/openhands/runtime/impl/eventstream/eventstream_runtime.py @@ -388,18 +388,12 @@ def _wait_until_alive(self): if not self.log_buffer: raise RuntimeError('Runtime client is not ready.') - response = send_request( + send_request( self.session, 'GET', f'{self.api_url}/alive', timeout=5, ) - if response.status_code == 200: - return - else: - msg = f'Action execution API is not alive. Response: {response}' - self.log('error', msg) - raise RuntimeError(msg) def close(self, rm_all_containers: bool = True): """Closes the EventStreamRuntime and associated objects @@ -488,18 +482,9 @@ def run_action(self, action: Action) -> Observation: json={'action': event_to_dict(action)}, timeout=action.timeout, ) - if response.status_code == 200: - output = response.json() - obs = observation_from_dict(output) - obs._cause = action.id # type: ignore[attr-defined] - else: - self.log('debug', f'action: {action}') - self.log('debug', f'response: {response}') - error_message = response.text - self.log('error', f'Error from server: {error_message}') - obs = ErrorObservation( - f'Action execution failed: {error_message}', - ) + output = response.json() + obs = observation_from_dict(output) + obs._cause = action.id # type: ignore[attr-defined] except requests.Timeout: self.log('error', 'No response received within the timeout period.') obs = ErrorObservation( @@ -561,7 +546,7 @@ def copy_to( params = {'destination': sandbox_dest, 'recursive': str(recursive).lower()} - response = send_request( + send_request( self.session, 'POST', f'{self.api_url}/upload_file', @@ -569,11 +554,6 @@ def copy_to( params=params, timeout=300, ) - if response.status_code == 200: - return - else: - error_message = response.text - raise Exception(f'Copy operation failed: {error_message}') except requests.Timeout: raise TimeoutError('Copy operation timed out') @@ -605,17 +585,11 @@ def list_files(self, path: str | None = None) -> list[str]: json=data, timeout=10, ) - if response.status_code == 200: - response_json = response.json() - assert isinstance(response_json, list) - return response_json - else: - error_message = response.text - raise Exception(f'List files operation failed: {error_message}') + response_json = response.json() + assert isinstance(response_json, list) + return response_json except requests.Timeout: raise TimeoutError('List files operation timed out') - except Exception as e: - raise RuntimeError(f'List files operation failed: {str(e)}') def copy_from(self, path: str) -> bytes: """Zip all files in the sandbox and return as a stream of bytes.""" @@ -630,16 +604,10 @@ def copy_from(self, path: str) -> bytes: stream=True, timeout=30, ) - if response.status_code == 200: - data = response.content - return data - else: - error_message = response.text - raise Exception(f'Copy operation failed: {error_message}') + data = response.content + return data except requests.Timeout: raise TimeoutError('Copy operation timed out') - except Exception as e: - raise RuntimeError(f'Copy operation failed: {str(e)}') def _is_port_in_use_docker(self, port): containers = self.docker_client.containers.list() From a021dada688936771afeeffdf4c36beb1e94e3c9 Mon Sep 17 00:00:00 2001 From: Robert Brennan Date: Thu, 31 Oct 2024 15:08:51 -0400 Subject: [PATCH 53/60] fix tests --- tests/unit/test_agent_controller.py | 55 +++++++++-------------------- 1 file changed, 17 insertions(+), 38 deletions(-) diff --git a/tests/unit/test_agent_controller.py b/tests/unit/test_agent_controller.py index dbd4bb701b5..b4d7fcddd2f 100644 --- a/tests/unit/test_agent_controller.py +++ b/tests/unit/test_agent_controller.py @@ -117,26 +117,6 @@ async def test_react_to_exception(mock_agent, mock_event_stream, mock_status_cal await controller.close() -@pytest.mark.asyncio -async def test_step_with_exception(mock_agent, mock_event_stream): - controller = AgentController( - agent=mock_agent, - event_stream=mock_event_stream, - max_iterations=10, - sid='test', - confirmation_mode=False, - headless_mode=True, - ) - controller.state.agent_state = AgentState.RUNNING - controller._react_to_exception = AsyncMock() - controller.agent.step.side_effect = RuntimeError('Test error') - await controller._step() - - # Verify that _react_to_exception was called with the correct error message - controller._react_to_exception.assert_called_once_with('') - await controller.close() - - @pytest.mark.asyncio async def test_run_controller_with_fatal_error(mock_agent, mock_event_stream): config = AppConfig() @@ -144,20 +124,23 @@ async def test_run_controller_with_fatal_error(mock_agent, mock_event_stream): event_stream = EventStream(sid='test', file_store=file_store) agent = MagicMock(spec=Agent) - # a random message to send to the runtime - event = CmdRunAction(command='ls') - agent.step.return_value = event + agent = MagicMock(spec=Agent) + + def agent_step_fn(state): + print(f'agent_step_fn received state: {state}') + return CmdRunAction(command='ls') + + agent.step = agent_step_fn agent.llm = MagicMock(spec=LLM) agent.llm.metrics = Metrics() agent.llm.config = config.get_llm_config() - error_obs = ErrorObservation('You messed around with Jim') - error_obs._cause = event.id - runtime = MagicMock(spec=Runtime) async def on_event(event: Event): if isinstance(event, CmdRunAction): + error_obs = ErrorObservation('You messed around with Jim') + error_obs._cause = event.id await event_stream.async_add_event(error_obs, EventSource.USER) event_stream.subscribe(EventStreamSubscriber.RUNTIME, on_event) @@ -173,27 +156,25 @@ async def on_event(event: Event): ) print(f'state: {state}') print(f'event_stream: {list(event_stream.get_events())}') - assert state.iteration == 1 + assert state.iteration == 4 # it will first become AgentState.ERROR, then become AgentState.STOPPED # in side run_controller (since the while loop + sleep no longer loop) assert state.agent_state == AgentState.STOPPED assert state.get_last_error() == 'You messed around with Jim' - assert len(list(event_stream.get_events())) == 5 + assert len(list(event_stream.get_events())) == 12 @pytest.mark.asyncio -async def test_run_controller_stop_with_stuck(mock_agent, mock_event_stream): +async def test_run_controller_stop_with_stuck(): config = AppConfig() file_store = get_file_store(config.file_store, config.file_store_path) event_stream = EventStream(sid='test', file_store=file_store) agent = MagicMock(spec=Agent) - # a random message to send to the runtime - event = CmdRunAction(command='ls') def agent_step_fn(state): print(f'agent_step_fn received state: {state}') - return event + return CmdRunAction(command='ls') agent.step = agent_step_fn agent.llm = MagicMock(spec=LLM) @@ -243,13 +224,11 @@ async def on_event(event: Event): and observation_dict['content'] == 'Non fatal error here to trigger loop' ) last_event = event_to_dict(events[-1]) - assert last_event['extras']['agent_state'] == 'error' + assert last_event['extras']['agent_state'] == 'stopped' assert last_event['observation'] == 'agent_state_changed' - # it will first become AgentState.ERROR, then become AgentState.STOPPED - # in side run_controller (since the while loop + sleep no longer loop) assert state.agent_state == AgentState.STOPPED - assert state.get_last_error() == 'Agent got stuck in a loop' + assert state.get_last_error() == 'Non fatal error here to trigger loop' @pytest.mark.asyncio @@ -316,7 +295,7 @@ async def test_step_max_iterations(mock_agent, mock_event_stream): assert controller.state.traffic_control_state == TrafficControlState.NORMAL await controller._step() assert controller.state.traffic_control_state == TrafficControlState.THROTTLING - assert controller.state.agent_state == AgentState.PAUSED + assert controller.state.agent_state == AgentState.ERROR await controller.close() @@ -356,7 +335,7 @@ async def test_step_max_budget(mock_agent, mock_event_stream): assert controller.state.traffic_control_state == TrafficControlState.NORMAL await controller._step() assert controller.state.traffic_control_state == TrafficControlState.THROTTLING - assert controller.state.agent_state == AgentState.PAUSED + assert controller.state.agent_state == AgentState.ERROR await controller.close() From a08879f3140ec0e2564d8e9098a7a7c3f6d1ce24 Mon Sep 17 00:00:00 2001 From: Robert Brennan Date: Thu, 31 Oct 2024 18:43:33 -0400 Subject: [PATCH 54/60] add stacklevel --- openhands/controller/agent_controller.py | 2 +- openhands/runtime/base.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/openhands/controller/agent_controller.py b/openhands/controller/agent_controller.py index 84105685225..a0b6f2c5a8c 100644 --- a/openhands/controller/agent_controller.py +++ b/openhands/controller/agent_controller.py @@ -132,7 +132,7 @@ def log(self, level: str, message: str, extra: dict | None = None): message (str): The message to log. """ message = f'[Agent Controller {self.id}] {message}' - getattr(logger, level)(message, extra=extra) + getattr(logger, level)(message, extra=extra, stacklevel=2) def update_state_before_step(self): self.state.iteration += 1 diff --git a/openhands/runtime/base.py b/openhands/runtime/base.py index 3228b2272e7..2d8b02cd2dd 100644 --- a/openhands/runtime/base.py +++ b/openhands/runtime/base.py @@ -113,7 +113,7 @@ def close(self) -> None: def log(self, level: str, message: str) -> None: message = f'[runtime {self.sid}] {message}' - getattr(logger, level)(message) + getattr(logger, level)(message, stacklevel=2) def send_status_message(self, message_id: str): """Sends a status message if the callback function was provided.""" From 98e25d12d4021ad79110276629e4649d82f82dd1 Mon Sep 17 00:00:00 2001 From: Robert Brennan Date: Thu, 31 Oct 2024 18:43:40 -0400 Subject: [PATCH 55/60] add debug info --- openhands/runtime/impl/eventstream/eventstream_runtime.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/openhands/runtime/impl/eventstream/eventstream_runtime.py b/openhands/runtime/impl/eventstream/eventstream_runtime.py index ddc47d0c66a..645be31ecf4 100644 --- a/openhands/runtime/impl/eventstream/eventstream_runtime.py +++ b/openhands/runtime/impl/eventstream/eventstream_runtime.py @@ -475,6 +475,7 @@ def run_action(self, action: Action) -> Observation: assert action.timeout is not None try: + print('SEND ACTION', event_to_dict(action)) response = send_request( self.session, 'POST', @@ -483,13 +484,19 @@ def run_action(self, action: Action) -> Observation: timeout=action.timeout, ) output = response.json() + print('GOT OUTPUT', output) obs = observation_from_dict(output) obs._cause = action.id # type: ignore[attr-defined] except requests.Timeout: + print('EXCEPTION TIMEOUT') self.log('error', 'No response received within the timeout period.') obs = ErrorObservation( f'Action execution timed out after {action.timeout} seconds.', ) + except Exception as e: + print('OTHER EXCEPTION', e) + raise e + self._refresh_logs() return obs From 0e874153c258f0a67eb35732bfd4ba205f26d4c1 Mon Sep 17 00:00:00 2001 From: Robert Brennan Date: Thu, 31 Oct 2024 18:46:51 -0400 Subject: [PATCH 56/60] fix timeout error --- openhands/runtime/utils/request.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/openhands/runtime/utils/request.py b/openhands/runtime/utils/request.py index 3b654c7aca4..27b90072b9f 100644 --- a/openhands/runtime/utils/request.py +++ b/openhands/runtime/utils/request.py @@ -57,7 +57,8 @@ def send_request( timeout: int = 10, **kwargs: Any, ) -> requests.Response: - kwargs['timeout'] = timeout + # wait a few more seconds to get the timeout error from client side + kwargs['timeout'] = timeout + 5 response = session.request(method, url, **kwargs) response.raise_for_status() return response From 6cc2cac4b796d164ee01fe21a10ed8439e597b7c Mon Sep 17 00:00:00 2001 From: Robert Brennan Date: Thu, 31 Oct 2024 18:47:00 -0400 Subject: [PATCH 57/60] Revert "add debug info" This reverts commit 98e25d12d4021ad79110276629e4649d82f82dd1. --- openhands/runtime/impl/eventstream/eventstream_runtime.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/openhands/runtime/impl/eventstream/eventstream_runtime.py b/openhands/runtime/impl/eventstream/eventstream_runtime.py index 645be31ecf4..ddc47d0c66a 100644 --- a/openhands/runtime/impl/eventstream/eventstream_runtime.py +++ b/openhands/runtime/impl/eventstream/eventstream_runtime.py @@ -475,7 +475,6 @@ def run_action(self, action: Action) -> Observation: assert action.timeout is not None try: - print('SEND ACTION', event_to_dict(action)) response = send_request( self.session, 'POST', @@ -484,19 +483,13 @@ def run_action(self, action: Action) -> Observation: timeout=action.timeout, ) output = response.json() - print('GOT OUTPUT', output) obs = observation_from_dict(output) obs._cause = action.id # type: ignore[attr-defined] except requests.Timeout: - print('EXCEPTION TIMEOUT') self.log('error', 'No response received within the timeout period.') obs = ErrorObservation( f'Action execution timed out after {action.timeout} seconds.', ) - except Exception as e: - print('OTHER EXCEPTION', e) - raise e - self._refresh_logs() return obs From 4f84a5029634aae42f2991ca9a3816903888b567 Mon Sep 17 00:00:00 2001 From: Robert Brennan Date: Thu, 31 Oct 2024 19:16:44 -0400 Subject: [PATCH 58/60] add ids to ErrorObservation --- frontend/src/components/error-message.tsx | 21 +++++++++++---------- frontend/src/routes/_oh.app.tsx | 21 ++++++--------------- openhands/events/observation/error.py | 1 + 3 files changed, 18 insertions(+), 25 deletions(-) diff --git a/frontend/src/components/error-message.tsx b/frontend/src/components/error-message.tsx index 75165a9a60f..d2624864473 100644 --- a/frontend/src/components/error-message.tsx +++ b/frontend/src/components/error-message.tsx @@ -24,16 +24,17 @@ export function ErrorMessage({ id, message }: ErrorMessageProps) {
{headline &&

{headline}

} - + {headline && ( + + )} {showDetails &&

{details}

}
diff --git a/frontend/src/routes/_oh.app.tsx b/frontend/src/routes/_oh.app.tsx index cb1ef3b9c7e..7a4aaa70cc9 100644 --- a/frontend/src/routes/_oh.app.tsx +++ b/frontend/src/routes/_oh.app.tsx @@ -185,20 +185,6 @@ function App() { if (q) addIntialQueryToChat(q, files); }, [settings]); - const handleError = (message: string) => { - const [error, ...rest] = message.split(":"); - const details = rest.join(":"); - if (!details) { - dispatch( - addErrorMessage({ - message: error, - }), - ); - } else { - dispatch(addErrorMessage({ message: details })); - } - }; - const handleMessage = React.useCallback( (message: MessageEvent) => { // set token received from the server @@ -224,7 +210,12 @@ function App() { return; } if (isErrorObservation(parsed)) { - handleError(parsed.message); + dispatch( + addErrorMessage({ + id: parsed.extras?.error_id, + message: parsed.message, + }), + ); return; } diff --git a/openhands/events/observation/error.py b/openhands/events/observation/error.py index c6139e86c0b..4ed05b89ac7 100644 --- a/openhands/events/observation/error.py +++ b/openhands/events/observation/error.py @@ -13,6 +13,7 @@ class ErrorObservation(Observation): """ observation: str = ObservationType.ERROR + error_id: str = '' @property def message(self) -> str: From 88a0eb4d776435ec2250b9de87419594e6a09a52 Mon Sep 17 00:00:00 2001 From: Robert Brennan Date: Thu, 31 Oct 2024 19:19:27 -0400 Subject: [PATCH 59/60] add some error_ids --- frontend/src/i18n/translation.json | 6 ++++++ openhands/runtime/impl/eventstream/eventstream_runtime.py | 3 +++ openhands/runtime/impl/remote/remote_runtime.py | 3 +++ 3 files changed, 12 insertions(+) diff --git a/frontend/src/i18n/translation.json b/frontend/src/i18n/translation.json index ab73c4f86e2..6db2520b6b8 100644 --- a/frontend/src/i18n/translation.json +++ b/frontend/src/i18n/translation.json @@ -1522,5 +1522,11 @@ }, "STATUS$ERROR_RUNTIME_DISCONNECTED": { "en": "There was an error while connecting to the runtime. Please refresh the page." + }, + "AGENT_ERROR$BAD_ACTION": { + "en": "Agent tried to execute a malformed action." + }, + "AGENT_ERROR$ACTION_TIMEOUT": { + "en": "Action timed out." } } diff --git a/openhands/runtime/impl/eventstream/eventstream_runtime.py b/openhands/runtime/impl/eventstream/eventstream_runtime.py index ddc47d0c66a..a04fb1381d1 100644 --- a/openhands/runtime/impl/eventstream/eventstream_runtime.py +++ b/openhands/runtime/impl/eventstream/eventstream_runtime.py @@ -457,10 +457,12 @@ def run_action(self, action: Action) -> Observation: if action_type not in ACTION_TYPE_TO_CLASS: return ErrorObservation( f'Action {action_type} does not exist.', + error_id='AGENT_ERROR$BAD_ACTION', ) if not hasattr(self, action_type): return ErrorObservation( f'Action {action_type} is not supported in the current runtime.', + error_id='AGENT_ERROR$BAD_ACTION', ) if ( getattr(action, 'confirmation_state', None) @@ -489,6 +491,7 @@ def run_action(self, action: Action) -> Observation: self.log('error', 'No response received within the timeout period.') obs = ErrorObservation( f'Action execution timed out after {action.timeout} seconds.', + error_id='AGENT_ERROR$ACTION_TIMEOUT', ) self._refresh_logs() return obs diff --git a/openhands/runtime/impl/remote/remote_runtime.py b/openhands/runtime/impl/remote/remote_runtime.py index b47f48fb991..d290ba55c65 100644 --- a/openhands/runtime/impl/remote/remote_runtime.py +++ b/openhands/runtime/impl/remote/remote_runtime.py @@ -327,10 +327,12 @@ def run_action(self, action: Action) -> Observation: if action_type not in ACTION_TYPE_TO_CLASS: return ErrorObservation( f'[Runtime (ID={self.runtime_id})] Action {action_type} does not exist.', + error_id='AGENT_ERROR$BAD_ACTION', ) if not hasattr(self, action_type): return ErrorObservation( f'[Runtime (ID={self.runtime_id})] Action {action_type} is not supported in the current runtime.', + error_id='AGENT_ERROR$BAD_ACTION', ) assert action.timeout is not None @@ -350,6 +352,7 @@ def run_action(self, action: Action) -> Observation: except requests.Timeout: obs = ErrorObservation( f'[Runtime (ID={self.runtime_id})] Action execution timed out', + error_id='AGENT_ERROR$ACTION_TIMEOUT', ) return obs return obs From 2128d0bdfccf33618ad4b47cc8cfd643fa76e457 Mon Sep 17 00:00:00 2001 From: Robert Brennan Date: Thu, 31 Oct 2024 19:31:41 -0400 Subject: [PATCH 60/60] fix type error --- frontend/src/components/error-message.tsx | 6 +++--- frontend/src/types/core/observations.ts | 3 +++ 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/frontend/src/components/error-message.tsx b/frontend/src/components/error-message.tsx index d2624864473..86454539f77 100644 --- a/frontend/src/components/error-message.tsx +++ b/frontend/src/components/error-message.tsx @@ -30,9 +30,9 @@ export function ErrorMessage({ id, message }: ErrorMessageProps) { onClick={() => setShowDetails(!showDetails)} className="cursor-pointer text-left" > - {showDetails - ? t("ERROR_MESSAGE$HIDE_DETAILS") - : t("ERROR_MESSAGE$SHOW_DETAILS")} + {showDetails + ? t("ERROR_MESSAGE$HIDE_DETAILS") + : t("ERROR_MESSAGE$SHOW_DETAILS")} )} {showDetails &&

{details}

} diff --git a/frontend/src/types/core/observations.ts b/frontend/src/types/core/observations.ts index 9de2a70e8b1..21bafddf21d 100644 --- a/frontend/src/types/core/observations.ts +++ b/frontend/src/types/core/observations.ts @@ -54,6 +54,9 @@ export interface BrowseObservation extends OpenHandsObservationEvent<"browse"> { export interface ErrorObservation extends OpenHandsObservationEvent<"error"> { source: "user"; + extras: { + error_id?: string; + }; } export type OpenHandsObservation =