diff --git a/.github/workflows/ghcr-build.yml b/.github/workflows/ghcr-build.yml
index 6906538a6a8..25d05b9a0ca 100644
--- a/.github/workflows/ghcr-build.yml
+++ b/.github/workflows/ghcr-build.yml
@@ -401,7 +401,7 @@ jobs:
           exit 1
   update_pr_description:
     name: Update PR Description
-    if: github.event_name == 'pull_request'
+    if: github.event_name == 'pull_request' && !github.event.pull_request.head.repo.fork
     needs: [ghcr_build_runtime]
     runs-on: ubuntu-latest
     steps:
diff --git a/.gitignore b/.gitignore
index a4bc03c4eeb..0cc7d149d78 100644
--- a/.gitignore
+++ b/.gitignore
@@ -174,6 +174,7 @@ evaluation/bird/data
 evaluation/gaia/data
 evaluation/gorilla/data
 evaluation/toolqa/data
+evaluation/scienceagentbench/benchmark
 
 # frontend
 
diff --git a/evaluation/scienceagentbench/Dockerfile b/evaluation/scienceagentbench/Dockerfile
new file mode 100644
index 00000000000..70ed92cc4dc
--- /dev/null
+++ b/evaluation/scienceagentbench/Dockerfile
@@ -0,0 +1,17 @@
+FROM python:3.11-bookworm
+
+
+# For OpenHands agents to explore the dataset directories, please download the full benchmark [here](https://buckeyemailosu-my.sharepoint.com/:u:/g/personal/chen_8336_buckeyemail_osu_edu/EQuA6uJ3CtRHvRfZ2GiN1tYBRVJE4DSUD10MW61fr7HuSQ?e=sCBegG) and unzip it with password `scienceagentbench`.
+# **Please DO NOT redistribute the unzipped data files online.**
+# It will download a benchmark.zip file to the current directory.
+# unzip it and put the benchmark folder under evaluation/scienceagentbench/
+
+RUN mkdir -p /benchmark
+COPY benchmark /benchmark
+
+RUN mkdir -p /workspace
+WORKDIR /workspace
+
+# pushd evaluation/scienceagentbench
+# docker build -t xingyaoww/openhands-eval-scienceagentbench .
+# popd
diff --git a/evaluation/scienceagentbench/Dockerfile.evaluator b/evaluation/scienceagentbench/Dockerfile.evaluator
new file mode 100644
index 00000000000..f8263e1bb0a
--- /dev/null
+++ b/evaluation/scienceagentbench/Dockerfile.evaluator
@@ -0,0 +1,25 @@
+FROM mambaorg/micromamba:debian12
+
+USER root
+# For https://github.com/OSU-NLP-Group/ScienceAgentBench/tree/main?tab=readme-ov-file#code-generation-with-agents
+
+RUN micromamba create -n sci-agent-eval python=3.10 pip setuptools wheel
+RUN micromamba run -n sci-agent-eval pip install pip-tools
+
+RUN mkdir -p /workspace
+WORKDIR /workspace
+
+RUN apt-get update && apt-get install -y git
+
+RUN git clone https://github.com/OSU-NLP-Group/ScienceAgentBench.git /workspace/
+RUN git checkout 4eddc7db6449a5ade3e37285747c8b208cd54ce7
+
+RUN micromamba create -n sci-agent python=3.10 pip setuptools wheel
+RUN micromamba run -n sci-agent pip install -r requirements.txt
+
+# Replace all occurence of conda with micromamba under the /workspace
+RUN find ./ -type f -exec sed -i 's/conda/micromamba/g' {} \;
+
+# pushd evaluation/scienceagentbench
+# docker build -t xingyaoww/openhands-eval-scienceagentbench-evaluator -f Dockerfile.evaluator .
+# popd
diff --git a/evaluation/scienceagentbench/README.md b/evaluation/scienceagentbench/README.md
new file mode 100644
index 00000000000..3182c2e117b
--- /dev/null
+++ b/evaluation/scienceagentbench/README.md
@@ -0,0 +1,54 @@
+# ScienceAgentBench Evaluation with OpenHands
+
+This folder contains the evaluation harness for [ScienceAgentBench](https://osu-nlp-group.github.io/ScienceAgentBench/) (paper: https://arxiv.org/abs/2410.05080).
+
+## Setup Environment and LLM Configuration
+
+Please follow instruction [here](../README.md#setup) to setup your local development environment and LLM.
+
+## Setup ScienceAgentBench
+
+To prevent benchmark data contamination, we only provide the annotation sheet on [Huggingface](https://huggingface.co/datasets/osunlp/ScienceAgentBench), which includes all necessary *inputs* to run an agent.
+
+## Run Inference on ScienceAgentBench
+
+```bash
+./evaluation/scienceagentbench/scripts/run_infer.sh [model_config] [git-version] [use_knowledge] [agent] [eval_limit] [max_iter] [num_workers] [dataset] [dataset_split]
+
+# Example
+./evaluation/scienceagentbench/scripts/run_infer.sh llm.eval_gpt4o 0.9.3
+```
+
+where `model_config` is mandatory, and the rest are optional.
+
+- `model_config`, e.g. `eval_gpt4_1106_preview`, is the config group name for your
+LLM settings, as defined in your `config.toml`.
+- `git-version`, e.g. `HEAD`, is the git commit hash of the OpenHands version you would
+like to evaluate. It could also be a release tag like `0.6.2`.
+- `use_knowledge`, e.g. `true`, specifies whether allowing the agent to use expert-provided knowledge as additional input or not. By default, it is set to `false`.
+- `agent`, e.g. `CodeActAgent`, is the name of the agent for benchmarks, defaulting
+to `CodeActAgent`.
+- `eval_limit`, e.g. `10`, limits the evaluation to the first `eval_limit` instances. By
+default, the script evaluates the entire SWE-bench_Lite test set (300 issues). Note:
+in order to use `eval_limit`, you must also set `agent`.
+- `max_iter`, e.g. `20`, is the maximum number of iterations for the agent to run. By
+default, it is set to 30.
+- `num_workers`, e.g. `3`, is the number of parallel workers to run the evaluation. By
+default, it is set to 1.
+
+## Evaluate Generated Programs
+
+### Extract Necessary Information from OpenHands Log
+
+After the inference is completed, you may use the following command to extract necessary information from the output log for evaluation:
+
+```bash
+python post_proc.py [log_fname]
+```
+- `log_fname`, e.g. `evaluation/.../output.jsonl`, is the automatically saved trajectory log of an OpenHands agent.
+
+Output will be write to e.g. `evaluation/.../output.converted.jsonl`
+
+### Run evaluation
+
+Please follow the steps [here](https://github.com/OSU-NLP-Group/ScienceAgentBench/tree/main?tab=readme-ov-file#evaluation-of-generated-code) to evaluate the generated programs.
diff --git a/evaluation/scienceagentbench/post_proc.py b/evaluation/scienceagentbench/post_proc.py
new file mode 100644
index 00000000000..46cfbe2b2a7
--- /dev/null
+++ b/evaluation/scienceagentbench/post_proc.py
@@ -0,0 +1,30 @@
+import json
+from argparse import ArgumentParser
+
+if __name__ == '__main__':
+    parser = ArgumentParser()
+    parser.add_argument(
+        'log_fname',
+        type=str,
+    )
+    args = parser.parse_args()
+
+    fname = args.log_fname
+    out_fname = args.log_fname.replace('.jsonl', '.converted.jsonl')
+
+    log = [json.loads(line) for line in open(fname)]
+
+    simple_log = [
+        json.dumps(
+            {
+                'instance_id': ex['instance_id'],
+                'instruction': ex['instruction'],
+                'test_result': ex['test_result'],
+                'cost': ex['metrics']['accumulated_cost'],
+            }
+        )
+        for ex in log
+    ]
+
+    with open(out_fname, 'w+', encoding='utf-8') as f:
+        f.write('\n'.join(simple_log))
diff --git a/evaluation/scienceagentbench/run_infer.py b/evaluation/scienceagentbench/run_infer.py
new file mode 100644
index 00000000000..52e42f29f12
--- /dev/null
+++ b/evaluation/scienceagentbench/run_infer.py
@@ -0,0 +1,292 @@
+import asyncio
+import os
+from typing import Any
+
+import pandas as pd
+from datasets import load_dataset
+from tqdm import tqdm
+
+from evaluation.utils.shared import (
+    EvalMetadata,
+    EvalOutput,
+    codeact_user_response,
+    make_metadata,
+    prepare_dataset,
+    reset_logger_for_multiprocessing,
+    run_evaluation,
+)
+from openhands.controller.state.state import State
+from openhands.core.config import (
+    AppConfig,
+    SandboxConfig,
+    get_llm_config_arg,
+    get_parser,
+)
+from openhands.core.logger import openhands_logger as logger
+from openhands.core.main import create_runtime, run_controller
+from openhands.events.action import CmdRunAction, MessageAction
+from openhands.events.observation import CmdOutputObservation
+from openhands.runtime.base import Runtime
+from openhands.utils.async_utils import call_async_from_sync
+
+AGENT_CLS_TO_FAKE_USER_RESPONSE_FN = {
+    'CodeActAgent': codeact_user_response,
+}
+
+LOCAL_DATASET_PATH = os.path.join(os.path.dirname(__file__), 'benchmark')
+
+
+def format_task_dict(example, use_knowledge):
+    task = {
+        'instance_id': example['instance_id'],
+        'task_inst': example['task_inst'],
+        'dataset_path': '/benchmark/datasets/'
+        + example['dataset_folder_tree'].split('\n')[0][4:],
+        'dataset_folder_tree': example['dataset_folder_tree'],
+        'dataset_preview': example['dataset_preview'],
+        'pred_program_name': 'pred_' + example['gold_program_name'],
+    }
+
+    if use_knowledge:
+        task['task_inst'] += '\n' + str(example['domain_knowledge'])
+
+    return task
+
+
+def get_config(
+    metadata: EvalMetadata,
+    instance_id: str,
+) -> AppConfig:
+    config = AppConfig(
+        default_agent=metadata.agent_class,
+        run_as_openhands=False,
+        runtime=os.environ.get('RUNTIME', 'eventstream'),
+        max_budget_per_task=4,
+        max_iterations=metadata.max_iterations,
+        sandbox=SandboxConfig(
+            base_container_image='docker.io/xingyaoww/openhands-eval-scienceagentbench',
+            enable_auto_lint=True,
+            use_host_network=False,
+            timeout=300,
+            api_key=os.environ.get('ALLHANDS_API_KEY', None),
+            remote_runtime_api_url=os.environ.get('SANDBOX_REMOTE_RUNTIME_API_URL'),
+            keep_remote_runtime_alive=False,
+        ),
+        # do not mount workspace
+        workspace_base=None,
+        workspace_mount_path=None,
+    )
+    config.set_llm_config(metadata.llm_config)
+    if metadata.llm_config.log_completions:
+        metadata.llm_config.log_completions_folder = os.path.join(
+            metadata.eval_output_dir, 'llm_completions', instance_id
+        )
+        logger.info(
+            f'Logging LLM completions for instance {instance_id} to '
+            f'{metadata.llm_config.log_completions_folder}'
+        )
+    return config
+
+
+def initialize_runtime(
+    runtime: Runtime,
+    instance: pd.Series,  # this argument is not required
+):
+    """Initialize the runtime for the agent.
+
+    This function is called before the runtime is used to run the agent.
+    """
+    logger.info(f"{'-' * 50} BEGIN Runtime Initialization Fn {'-' * 50}")
+    obs: CmdOutputObservation
+
+    # Set up workspace directories
+    action = CmdRunAction(command='mkdir -p /workspace/pred_programs')
+    logger.info(action, extra={'msg_type': 'ACTION'})
+    obs = runtime.run_action(action)
+    assert obs.exit_code == 0
+
+    action = CmdRunAction(command='mkdir -p /workspace/pred_results')
+    logger.info(action, extra={'msg_type': 'ACTION'})
+    obs = runtime.run_action(action)
+    assert obs.exit_code == 0
+
+    dataset_name = instance['dataset_folder_tree'].split('\n')[0][4:].rstrip('/')
+
+    # Copy the dataset to the workspace
+    dataset_dir = os.path.join(
+        LOCAL_DATASET_PATH,
+        'datasets',
+        dataset_name,
+    )
+    runtime.copy_to(dataset_dir, '/workspace/benchmark/datasets', recursive=True)
+
+    # Check the dataset exists
+    action = CmdRunAction(
+        command='cd /workspace/benchmark/datasets && ls',
+        keep_prompt=False,
+    )
+    obs = runtime.run_action(action)
+    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
+    assert obs.exit_code == 0
+    assert dataset_name in obs.content
+
+    logger.info(f"{'-' * 50} END Runtime Initialization Fn {'-' * 50}")
+
+
+def complete_runtime(
+    runtime: Runtime,
+    instance: pd.Series,
+) -> dict[str, Any]:
+    """Complete the runtime for the agent.
+
+    This function is called before the runtime is used to run the agent.
+    If you need to do something in the sandbox to get the correctness metric after
+    the agent has run, modify this function.
+    """
+    logger.info(f"{'-' * 50} BEGIN Runtime Completion Fn {'-' * 50}")
+    obs: CmdOutputObservation
+
+    test_result = {}
+
+    action = CmdRunAction(command='cd /workspace')
+    logger.info(action, extra={'msg_type': 'ACTION'})
+    obs = runtime.run_action(action)
+
+    assert obs.exit_code == 0
+
+    action = CmdRunAction(
+        command=f'cat pred_programs/{instance.pred_program_name}',
+        keep_prompt=False,
+    )
+    logger.info(action, extra={'msg_type': 'ACTION'})
+    obs = runtime.run_action(action)
+
+    if obs.exit_code == 0:
+        test_result = {'program': obs.content}
+    else:
+        test_result = {'program': 'ERROR'}
+
+    logger.info(f"{'-' * 50} END Runtime Completion Fn {'-' * 50}")
+    return test_result
+
+
+def process_instance(
+    instance: pd.Series,
+    metadata: EvalMetadata,
+    reset_logger: bool = True,
+) -> EvalOutput:
+    instance_id = instance.instance_id.replace('/', '__')
+    config = get_config(metadata, instance_id)
+
+    # Set up the logger properly, so you can run multi-processing to parallelize the evaluation
+    if reset_logger:
+        log_dir = os.path.join(metadata.eval_output_dir, 'infer_logs')
+        reset_logger_for_multiprocessing(logger, instance_id, log_dir)
+    else:
+        logger.info(f'Starting evaluation for instance {instance_id}.')
+
+    instruction = f"""You are an expert Python programming assistant that helps scientist users to write high-quality code to solve their tasks.
+Given a user request, you are expected to write a complete program that accomplishes the requested task and save any outputs to `/workspace/pred_results/` in the correct format.
+
+Here's the user request you need to work on:
+{instance.task_inst}
+
+You can access the dataset at `{instance.dataset_path}`. Here is the directory structure of the dataset:
+```
+{instance.dataset_folder_tree}
+```
+Here are some helpful previews for the dataset file(s):
+{instance.dataset_preview}
+
+Please save your program as `/workspace/pred_programs/{instance.pred_program_name}`.
+Then, please run the program to check and fix any errors.
+Please do NOT run the program in the background.
+If the program uses some packages that are incompatible, please figure out alternative implementations and do NOT restart the environment.
+
+"""
+
+    runtime = create_runtime(config)
+    call_async_from_sync(runtime.connect)
+    initialize_runtime(runtime, instance)
+
+    # Here's how you can run the agent (similar to the `main` function) and get the final task state
+    state: State | None = asyncio.run(
+        run_controller(
+            config=config,
+            initial_user_action=MessageAction(content=instruction),
+            runtime=runtime,
+            fake_user_response_fn=AGENT_CLS_TO_FAKE_USER_RESPONSE_FN.get(
+                metadata.agent_class
+            ),
+        )
+    )
+
+    # ======= Attempt to evaluate the agent's edits =======
+    test_result = complete_runtime(runtime, instance)
+
+    # If you are working on some simpler benchmark that only evaluates the final model output (e.g., in a MessageAction)
+    # You can simply get the LAST `MessageAction` from the returned `state.history` and parse it for evaluation.
+    if state is None:
+        raise ValueError('State should not be None.')
+    metrics = state.metrics.get() if state.metrics else None
+
+    # history is now available as a stream of events, rather than list of pairs of (Action, Observation)
+    # for compatibility with the existing output format, we can remake the pairs here
+    # remove when it becomes unnecessary
+    histories = state.history.compatibility_for_eval_history_pairs()
+
+    # Save the output
+    output = EvalOutput(
+        instance_id=instance.instance_id,
+        instruction=instruction,
+        metadata=metadata,
+        history=histories,
+        metrics=metrics,
+        error=state.last_error if state and state.last_error else None,
+        test_result=test_result,
+    )
+    return output
+
+
+if __name__ == '__main__':
+    parser = get_parser()
+    parser.add_argument(
+        '--use_knowledge',
+        type=str,
+        default='false',
+        choices=['true', 'false'],
+        help='use expert-provided knowledge or not',
+    )
+    args, _ = parser.parse_known_args()
+
+    sab_dataset = load_dataset('osunlp/ScienceAgentBench', split='validation')
+
+    dataset_processed = []
+    for example in tqdm(sab_dataset):
+        dataset_processed.append(
+            format_task_dict(example, args.use_knowledge == 'true')
+        )
+
+    dataset = pd.DataFrame(dataset_processed)
+
+    llm_config = None
+    if args.llm_config:
+        llm_config = get_llm_config_arg(args.llm_config)
+    if llm_config is None:
+        raise ValueError(f'Could not find LLM config: --llm_config {args.llm_config}')
+
+    metadata = make_metadata(
+        llm_config,
+        'ScienceAgentBench',
+        args.agent_cls,
+        args.max_iterations,
+        args.eval_note,
+        args.eval_output_dir,
+    )
+    output_file = os.path.join(metadata.eval_output_dir, 'output.jsonl')
+    dataset['instance_id'] = dataset['instance_id'].apply(str)
+    instances = prepare_dataset(dataset, output_file, args.eval_n_limit)
+
+    run_evaluation(
+        instances, metadata, output_file, args.eval_num_workers, process_instance
+    )
diff --git a/evaluation/scienceagentbench/scripts/run_infer.sh b/evaluation/scienceagentbench/scripts/run_infer.sh
new file mode 100755
index 00000000000..7667e572378
--- /dev/null
+++ b/evaluation/scienceagentbench/scripts/run_infer.sh
@@ -0,0 +1,49 @@
+#!/bin/bash
+set -eo pipefail
+
+source "evaluation/utils/version_control.sh"
+
+MODEL_CONFIG=$1
+COMMIT_HASH=$2
+USE_KNOWLEDGE=$3
+AGENT=$4
+EVAL_LIMIT=$5
+NUM_WORKERS=$6
+
+if [ -z "$NUM_WORKERS" ]; then
+  NUM_WORKERS=1
+  echo "Number of workers not specified, use default $NUM_WORKERS"
+fi
+checkout_eval_branch
+
+if [ -z "$AGENT" ]; then
+  echo "Agent not specified, use default CodeActAgent"
+  AGENT="CodeActAgent"
+fi
+
+if [ -z "$USE_KNOWLEDGE" ]; then
+  echo "Use knowledge not specified, use default False"
+  USE_KNOWLEDGE=false
+fi
+
+get_agent_version
+
+echo "AGENT: $AGENT"
+echo "AGENT_VERSION: $AGENT_VERSION"
+echo "MODEL_CONFIG: $MODEL_CONFIG"
+
+COMMAND="poetry run python evaluation/scienceagentbench/run_infer.py \
+  --agent-cls $AGENT \
+  --llm-config $MODEL_CONFIG \
+  --use_knowledge $USE_KNOWLEDGE \
+  --max-iterations 30 \
+  --eval-num-workers $NUM_WORKERS \
+  --eval-note $AGENT_VERSION" \
+
+if [ -n "$EVAL_LIMIT" ]; then
+  echo "EVAL_LIMIT: $EVAL_LIMIT"
+  COMMAND="$COMMAND --eval-n-limit $EVAL_LIMIT"
+fi
+
+# Run the command
+eval $COMMAND
diff --git a/frontend/__tests__/components/feedback-form.test.tsx b/frontend/__tests__/components/feedback-form.test.tsx
index b41fa7e5c77..28684401e2c 100644
--- a/frontend/__tests__/components/feedback-form.test.tsx
+++ b/frontend/__tests__/components/feedback-form.test.tsx
@@ -5,7 +5,6 @@ import { FeedbackForm } from "#/components/feedback-form";
 
 describe("FeedbackForm", () => {
   const user = userEvent.setup();
-  const onSubmitMock = vi.fn();
   const onCloseMock = vi.fn();
 
   afterEach(() => {
@@ -13,7 +12,7 @@ describe("FeedbackForm", () => {
   });
 
   it("should render correctly", () => {
-    render(<FeedbackForm onSubmit={onSubmitMock} onClose={onCloseMock} />);
+    render(<FeedbackForm polarity="positive" onClose={onCloseMock} />);
 
     screen.getByLabelText("Email");
     screen.getByLabelText("Private");
@@ -24,7 +23,7 @@ describe("FeedbackForm", () => {
   });
 
   it("should switch between private and public permissions", async () => {
-    render(<FeedbackForm onSubmit={onSubmitMock} onClose={onCloseMock} />);
+    render(<FeedbackForm polarity="positive" onClose={onCloseMock} />);
     const privateRadio = screen.getByLabelText("Private");
     const publicRadio = screen.getByLabelText("Public");
 
@@ -40,69 +39,11 @@ describe("FeedbackForm", () => {
     expect(publicRadio).not.toBeChecked();
   });
 
-  it("should call onSubmit when the form is submitted", async () => {
-    render(<FeedbackForm onSubmit={onSubmitMock} onClose={onCloseMock} />);
-    const email = screen.getByLabelText("Email");
-
-    await user.type(email, "test@test.test");
-    await user.click(screen.getByRole("button", { name: "Submit" }));
-
-    expect(onSubmitMock).toHaveBeenCalledWith("private", "test@test.test"); // private is the default value
-  });
-
-  it("should not call onSubmit when the email is invalid", async () => {
-    render(<FeedbackForm onSubmit={onSubmitMock} onClose={onCloseMock} />);
-    const email = screen.getByLabelText("Email");
-    const submitButton = screen.getByRole("button", { name: "Submit" });
-
-    await user.click(submitButton);
-
-    expect(onSubmitMock).not.toHaveBeenCalled();
-
-    await user.type(email, "test");
-    await user.click(submitButton);
-
-    expect(onSubmitMock).not.toHaveBeenCalled();
-  });
-
-  it("should submit public permissions when the public radio is checked", async () => {
-    render(<FeedbackForm onSubmit={onSubmitMock} onClose={onCloseMock} />);
-    const email = screen.getByLabelText("Email");
-    const publicRadio = screen.getByLabelText("Public");
-
-    await user.type(email, "test@test.test");
-    await user.click(publicRadio);
-    await user.click(screen.getByRole("button", { name: "Submit" }));
-
-    expect(onSubmitMock).toHaveBeenCalledWith("public", "test@test.test");
-  });
-
   it("should call onClose when the close button is clicked", async () => {
-    render(<FeedbackForm onSubmit={onSubmitMock} onClose={onCloseMock} />);
+    render(<FeedbackForm polarity="positive" onClose={onCloseMock} />);
     await user.click(screen.getByRole("button", { name: "Cancel" }));
 
-    expect(onSubmitMock).not.toHaveBeenCalled();
     expect(onCloseMock).toHaveBeenCalled();
   });
 
-  it("should disable the buttons if isSubmitting is true", () => {
-    const { rerender } = render(
-      <FeedbackForm onSubmit={onSubmitMock} onClose={onCloseMock} />,
-    );
-    const submitButton = screen.getByRole("button", { name: "Submit" });
-    const cancelButton = screen.getByRole("button", { name: "Cancel" });
-
-    expect(submitButton).not.toBeDisabled();
-    expect(cancelButton).not.toBeDisabled();
-
-    rerender(
-      <FeedbackForm
-        onSubmit={onSubmitMock}
-        onClose={onCloseMock}
-        isSubmitting
-      />,
-    );
-    expect(submitButton).toBeDisabled();
-    expect(cancelButton).toBeDisabled();
-  });
 });
diff --git a/frontend/src/api/open-hands.types.ts b/frontend/src/api/open-hands.types.ts
index ba0e8642bc7..9da1a339b4d 100644
--- a/frontend/src/api/open-hands.types.ts
+++ b/frontend/src/api/open-hands.types.ts
@@ -31,7 +31,7 @@ export interface Feedback {
   version: string;
   email: string;
   token: string;
-  feedback: "positive" | "negative";
+  polarity: "positive" | "negative";
   permissions: "public" | "private";
   trajectory: unknown[];
 }
diff --git a/frontend/src/components/chat-interface.tsx b/frontend/src/components/chat-interface.tsx
index 10786c13923..25a57073698 100644
--- a/frontend/src/components/chat-interface.tsx
+++ b/frontend/src/components/chat-interface.tsx
@@ -1,6 +1,5 @@
 import { useDispatch, useSelector } from "react-redux";
 import React from "react";
-import { useFetcher } from "@remix-run/react";
 import { useSocket } from "#/context/socket";
 import { convertImageToBase64 } from "#/utils/convert-image-to-base-64";
 import { ChatMessage } from "./chat-message";
@@ -13,10 +12,6 @@ import { RootState } from "#/store";
 import AgentState from "#/types/AgentState";
 import { generateAgentStateChangeEvent } from "#/services/agentStateService";
 import { FeedbackModal } from "./feedback-modal";
-import { Feedback } from "#/api/open-hands.types";
-import { getToken } from "#/services/auth";
-import { removeApiKey, removeUnwantedKeys } from "#/utils/utils";
-import { clientAction } from "#/routes/submit-feedback";
 import { useScrollToBottom } from "#/hooks/useScrollToBottom";
 import TypingIndicator from "./chat/TypingIndicator";
 import ConfirmationButtons from "./chat/ConfirmationButtons";
@@ -24,16 +19,13 @@ import { ErrorMessage } from "./error-message";
 import { ContinueButton } from "./continue-button";
 import { ScrollToBottomButton } from "./scroll-to-bottom-button";
 
-const FEEDBACK_VERSION = "1.0";
-
 const isErrorMessage = (
   message: Message | ErrorMessage,
 ): message is ErrorMessage => "error" in message;
 
 export function ChatInterface() {
-  const { send, events } = useSocket();
+  const { send } = useSocket();
   const dispatch = useDispatch();
-  const fetcher = useFetcher<typeof clientAction>({ key: "feedback" });
   const scrollRef = React.useRef<HTMLDivElement>(null);
   const { scrollDomToBottom, onChatBodyScroll, hitBottom } =
     useScrollToBottom(scrollRef);
@@ -44,7 +36,6 @@ export function ChatInterface() {
   const [feedbackPolarity, setFeedbackPolarity] = React.useState<
     "positive" | "negative"
   >("positive");
-  const [feedbackShared, setFeedbackShared] = React.useState(0);
   const [feedbackModalIsOpen, setFeedbackModalIsOpen] = React.useState(false);
 
   const handleSendMessage = async (content: string, files: File[]) => {
@@ -71,30 +62,6 @@ export function ChatInterface() {
     setFeedbackPolarity(polarity);
   };
 
-  const handleSubmitFeedback = (
-    permissions: "private" | "public",
-    email: string,
-  ) => {
-    const feedback: Feedback = {
-      version: FEEDBACK_VERSION,
-      feedback: feedbackPolarity,
-      email,
-      permissions,
-      token: getToken(),
-      trajectory: removeApiKey(removeUnwantedKeys(events)),
-    };
-
-    const formData = new FormData();
-    formData.append("feedback", JSON.stringify(feedback));
-
-    fetcher.submit(formData, {
-      action: "/submit-feedback",
-      method: "POST",
-    });
-
-    setFeedbackShared(messages.length);
-  };
-
   return (
     <div className="h-full flex flex-col justify-between">
       <div
@@ -130,16 +97,14 @@ export function ChatInterface() {
 
       <div className="flex flex-col gap-[6px] px-4 pb-4">
         <div className="flex justify-between relative">
-          {feedbackShared !== messages.length && messages.length > 3 && (
-            <FeedbackActions
-              onPositiveFeedback={() =>
-                onClickShareFeedbackActionButton("positive")
-              }
-              onNegativeFeedback={() =>
-                onClickShareFeedbackActionButton("negative")
-              }
-            />
-          )}
+          <FeedbackActions
+            onPositiveFeedback={() =>
+              onClickShareFeedbackActionButton("positive")
+            }
+            onNegativeFeedback={() =>
+              onClickShareFeedbackActionButton("negative")
+            }
+          />
           <div className="absolute left-1/2 transform -translate-x-1/2 bottom-0">
             {messages.length > 2 &&
               curAgentState === AgentState.AWAITING_USER_INPUT && (
@@ -163,9 +128,8 @@ export function ChatInterface() {
 
       <FeedbackModal
         isOpen={feedbackModalIsOpen}
-        isSubmitting={fetcher.state === "submitting"}
         onClose={() => setFeedbackModalIsOpen(false)}
-        onSubmit={handleSubmitFeedback}
+        polarity={feedbackPolarity}
       />
     </div>
   );
diff --git a/frontend/src/components/feedback-form.tsx b/frontend/src/components/feedback-form.tsx
index 7ff03a44307..4e1ddde6354 100644
--- a/frontend/src/components/feedback-form.tsx
+++ b/frontend/src/components/feedback-form.tsx
@@ -1,27 +1,87 @@
+import React from "react";
+import hotToast from "react-hot-toast";
 import ModalButton from "./buttons/ModalButton";
+import { request } from "#/services/api";
+import { Feedback } from "#/api/open-hands.types";
+
+const FEEDBACK_VERSION = "1.0";
+const VIEWER_PAGE = "https://www.all-hands.dev/share";
 
 interface FeedbackFormProps {
-  onSubmit: (permissions: "private" | "public", email: string) => void;
   onClose: () => void;
-  isSubmitting?: boolean;
+  polarity: "positive" | "negative";
 }
 
-export function FeedbackForm({
-  onSubmit,
-  onClose,
-  isSubmitting,
-}: FeedbackFormProps) {
-  const handleSubmit = (event: React.FormEvent<HTMLFormElement>) => {
+export function FeedbackForm({ onClose, polarity }: FeedbackFormProps) {
+  const [isSubmitting, setIsSubmitting] = React.useState(false);
+
+  const copiedToClipboardToast = () => {
+    hotToast("Password copied to clipboard", {
+      icon: "📋",
+      position: "bottom-right",
+    });
+  };
+
+  const onPressToast = (password: string) => {
+    navigator.clipboard.writeText(password);
+    copiedToClipboardToast();
+  };
+
+  const shareFeedbackToast = (
+    message: string,
+    link: string,
+    password: string,
+  ) => {
+    hotToast(
+      <div className="flex flex-col gap-1">
+        <span>{message}</span>
+        <a
+          data-testid="toast-share-url"
+          className="text-blue-500 underline"
+          onClick={() => onPressToast(password)}
+          href={link}
+          target="_blank"
+          rel="noreferrer"
+        >
+          Go to shared feedback
+        </a>
+        <span onClick={() => onPressToast(password)} className="cursor-pointer">
+          Password: {password} <span className="text-gray-500">(copy)</span>
+        </span>
+      </div>,
+      { duration: 10000 },
+    );
+  };
+
+  const handleSubmit = async (event: React.FormEvent<HTMLFormElement>) => {
     event?.preventDefault();
     const formData = new FormData(event.currentTarget);
+    setIsSubmitting(true);
+
+    const email = formData.get("email")?.toString() || "";
+    const permissions = (formData.get("permissions")?.toString() ||
+      "private") as "private" | "public";
 
-    const email = formData.get("email")?.toString();
-    const permissions = formData.get("permissions")?.toString() as
-      | "private"
-      | "public"
-      | undefined;
+    const feedback: Feedback = {
+      version: FEEDBACK_VERSION,
+      email,
+      polarity,
+      permissions,
+      trajectory: [],
+      token: "",
+    };
 
-    if (email) onSubmit(permissions || "private", email);
+    const response = await request("/api/submit-feedback", {
+      method: "POST",
+      body: JSON.stringify(feedback),
+      headers: {
+        "Content-Type": "application/json",
+      },
+    });
+    const { message, feedback_id, password } = response.body; // eslint-disable-line
+    const link = `${VIEWER_PAGE}?share_id=${feedback_id}`;
+    shareFeedbackToast(message, link, password);
+    setIsSubmitting(false);
   };
 
   return (
diff --git a/frontend/src/components/feedback-modal.tsx b/frontend/src/components/feedback-modal.tsx
index f9cf05f0789..96663135e89 100644
--- a/frontend/src/components/feedback-modal.tsx
+++ b/frontend/src/components/feedback-modal.tsx
@@ -1,6 +1,4 @@
 import React from "react";
-import hotToast, { toast } from "react-hot-toast";
-import { useFetcher } from "@remix-run/react";
 import { FeedbackForm } from "./feedback-form";
 import {
   BaseModalTitle,
@@ -8,82 +6,18 @@ import {
 } from "./modals/confirmation-modals/BaseModal";
 import { ModalBackdrop } from "./modals/modal-backdrop";
 import ModalBody from "./modals/ModalBody";
-import { clientAction } from "#/routes/submit-feedback";
 
 interface FeedbackModalProps {
-  onSubmit: (permissions: "private" | "public", email: string) => void;
   onClose: () => void;
   isOpen: boolean;
-  isSubmitting?: boolean;
+  polarity: "positive" | "negative";
 }
 
 export function FeedbackModal({
-  onSubmit,
   onClose,
   isOpen,
-  isSubmitting,
+  polarity,
 }: FeedbackModalProps) {
-  const fetcher = useFetcher<typeof clientAction>({ key: "feedback" });
-  const isInitialRender = React.useRef(true);
-
-  const copiedToClipboardToast = () => {
-    hotToast("Password copied to clipboard", {
-      icon: "📋",
-      position: "bottom-right",
-    });
-  };
-
-  const onPressToast = (password: string) => {
-    navigator.clipboard.writeText(password);
-    copiedToClipboardToast();
-  };
-
-  const shareFeedbackToast = (
-    message: string,
-    link: string,
-    password: string,
-  ) => {
-    hotToast(
-      <div className="flex flex-col gap-1">
-        <span>{message}</span>
-        <a
-          data-testid="toast-share-url"
-          className="text-blue-500 underline"
-          onClick={() => onPressToast(password)}
-          href={link}
-          target="_blank"
-          rel="noreferrer"
-        >
-          Go to shared feedback
-        </a>
-        <span onClick={() => onPressToast(password)} className="cursor-pointer">
-          Password: {password} <span className="text-gray-500">(copy)</span>
-        </span>
-      </div>,
-      { duration: 10000 },
-    );
-  };
-
-  React.useEffect(() => {
-    if (isInitialRender.current) {
-      isInitialRender.current = false;
-      return;
-    }
-
-    // Handle feedback submission
-    if (fetcher.state === "idle" && fetcher.data) {
-      if (!fetcher.data.success) {
-        toast.error("Error submitting feedback");
-      } else if (fetcher.data.data) {
-        const { data } = fetcher.data;
-        const { message, link, password } = data;
-        shareFeedbackToast(message, link, password);
-      }
-
-      onClose();
-    }
-  }, [fetcher.state, fetcher.data?.success]);
-
   if (!isOpen) return null;
 
   return (
@@ -91,11 +25,7 @@ export function FeedbackModal({
       <ModalBody>
         <BaseModalTitle title="Feedback" />
         <BaseModalDescription description="To help us improve, we collect feedback from your interactions to improve our prompts. By submitting this form, you consent to us collecting this data." />
-        <FeedbackForm
-          onSubmit={onSubmit}
-          onClose={onClose}
-          isSubmitting={isSubmitting}
-        />
+        <FeedbackForm onClose={onClose} polarity={polarity} />
       </ModalBody>
     </ModalBackdrop>
   );
diff --git a/frontend/src/routes/submit-feedback.ts b/frontend/src/routes/submit-feedback.ts
deleted file mode 100644
index 19281eea7ad..00000000000
--- a/frontend/src/routes/submit-feedback.ts
+++ /dev/null
@@ -1,47 +0,0 @@
-import { ClientActionFunctionArgs, json } from "@remix-run/react";
-import { Feedback } from "#/api/open-hands.types";
-import OpenHands from "#/api/open-hands";
-
-const VIEWER_PAGE = "https://www.all-hands.dev/share";
-
-const isFeedback = (feedback: unknown): feedback is Feedback => {
-  if (typeof feedback !== "object" || feedback === null) {
-    return false;
-  }
-
-  return (
-    "version" in feedback &&
-    "email" in feedback &&
-    "token" in feedback &&
-    "feedback" in feedback &&
-    "permissions" in feedback &&
-    "trajectory" in feedback
-  );
-};
-
-export const clientAction = async ({ request }: ClientActionFunctionArgs) => {
-  const formData = await request.formData();
-  const feedback = formData.get("feedback")?.toString();
-  const token = localStorage.getItem("token");
-
-  if (token && feedback) {
-    const parsed = JSON.parse(feedback);
-    if (isFeedback(parsed)) {
-      try {
-        const response = await OpenHands.sendFeedback(token, parsed);
-        if (response.statusCode === 200) {
-          const { message, feedback_id: feedbackId, password } = response.body;
-          const link = `${VIEWER_PAGE}?share_id=${feedbackId}`;
-          return json({
-            success: true,
-            data: { message, link, password },
-          });
-        }
-      } catch (error) {
-        return json({ success: false, data: null });
-      }
-    }
-  }
-
-  return json({ success: false, data: null });
-};
diff --git a/openhands/events/stream.py b/openhands/events/stream.py
index 1e4c3b9d539..aafbcc2fc87 100644
--- a/openhands/events/stream.py
+++ b/openhands/events/stream.py
@@ -71,7 +71,15 @@ def get_events(
         end_id=None,
         reverse=False,
         filter_out_type: tuple[type[Event], ...] | None = None,
+        filter_hidden=False,
     ) -> Iterable[Event]:
+        def should_filter(event: Event):
+            if filter_hidden and hasattr(event, 'hidden') and event.hidden:
+                return True
+            if filter_out_type is not None and isinstance(event, filter_out_type):
+                return True
+            return False
+
         if reverse:
             if end_id is None:
                 end_id = self._cur_id - 1
@@ -79,9 +87,7 @@ def get_events(
             while event_id >= start_id:
                 try:
                     event = self.get_event(event_id)
-                    if filter_out_type is None or not isinstance(
-                        event, filter_out_type
-                    ):
+                    if not should_filter(event):
                         yield event
                 except FileNotFoundError:
                     logger.debug(f'No event found for ID {event_id}')
@@ -93,9 +99,7 @@ def get_events(
                     break
                 try:
                     event = self.get_event(event_id)
-                    if filter_out_type is None or not isinstance(
-                        event, filter_out_type
-                    ):
+                    if not should_filter(event):
                         yield event
                 except FileNotFoundError:
                     break
diff --git a/openhands/runtime/impl/e2b/sandbox.py b/openhands/runtime/impl/e2b/sandbox.py
index 666dc43f701..d145dac3511 100644
--- a/openhands/runtime/impl/e2b/sandbox.py
+++ b/openhands/runtime/impl/e2b/sandbox.py
@@ -4,9 +4,7 @@
 from glob import glob
 
 from e2b import Sandbox as E2BSandbox
-from e2b.sandbox.exception import (
-    TimeoutException,
-)
+from e2b.sandbox.exception import TimeoutException
 
 from openhands.core.config import SandboxConfig
 from openhands.core.logger import openhands_logger as logger
diff --git a/openhands/server/data_models/feedback.py b/openhands/server/data_models/feedback.py
index cbdd8744807..59f32008b52 100644
--- a/openhands/server/data_models/feedback.py
+++ b/openhands/server/data_models/feedback.py
@@ -1,5 +1,5 @@
 import json
-from typing import Any, Literal
+from typing import Any, Literal, Optional
 
 import requests
 from pydantic import BaseModel
@@ -10,10 +10,12 @@
 class FeedbackDataModel(BaseModel):
     version: str
     email: str
-    token: str
-    feedback: Literal['positive', 'negative']
+    polarity: Literal['positive', 'negative']
+    feedback: Literal[
+        'positive', 'negative'
+    ]  # TODO: remove this, its here for backward compatibility
     permissions: Literal['public', 'private']
-    trajectory: list[dict[str, Any]]
+    trajectory: Optional[list[dict[str, Any]]]
 
 
 FEEDBACK_URL = 'https://share-od-trajectory-3u9bw9tx.uc.gateway.dev/share_od_trajectory'
@@ -21,6 +23,7 @@ class FeedbackDataModel(BaseModel):
 
 def store_feedback(feedback: FeedbackDataModel) -> dict[str, str]:
     # Start logging
+    feedback.feedback = feedback.polarity
     display_feedback = feedback.model_dump()
     if 'trajectory' in display_feedback:
         display_feedback['trajectory'] = (
diff --git a/openhands/server/listen.py b/openhands/server/listen.py
index fc740e80293..cc74d5ba736 100644
--- a/openhands/server/listen.py
+++ b/openhands/server/listen.py
@@ -634,14 +634,14 @@ async def upload_file(request: Request, files: list[UploadFile]):
 
 
 @app.post('/api/submit-feedback')
-async def submit_feedback(request: Request, feedback: FeedbackDataModel):
+async def submit_feedback(request: Request):
     """Submit user feedback.
 
     This function stores the provided feedback data.
 
     To submit feedback:
     ```sh
-    curl -X POST -F "email=test@example.com" -F "token=abc" -F "feedback=positive" -F "permissions=private" -F "trajectory={}" http://localhost:3000/api/submit-feedback
+    curl -X POST -d '{"email": "test@example.com"}' -H "Authorization:"
     ```
 
     Args:
@@ -656,6 +656,19 @@ async def submit_feedback(request: Request, feedback: FeedbackDataModel):
     """
     # Assuming the storage service is already configured in the backend
     # and there is a function to handle the storage.
+    body = await request.json()
+    events = request.state.conversation.event_stream.get_events(filter_hidden=True)
+    trajectory = []
+    for event in events:
+        trajectory.append(event_to_dict(event))
+    feedback = FeedbackDataModel(
+        email=body.get('email', ''),
+        version=body.get('version', ''),
+        permissions=body.get('permissions', 'private'),
+        polarity=body.get('polarity', ''),
+        feedback=body.get('polarity', ''),
+        trajectory=trajectory,
+    )
     try:
         feedback_data = store_feedback(feedback)
         return JSONResponse(status_code=200, content=feedback_data)