diff --git a/examples/experiment_eval.py b/examples/experiment_eval.py index 82fe4bbd0..55e3db036 100644 --- a/examples/experiment_eval.py +++ b/examples/experiment_eval.py @@ -21,7 +21,7 @@ ) from sotopia.envs.evaluators import ( EvaluationForTwoAgents, - ReachGoalLLMEvaluator, + EpisodeLLMEvaluator, RuleBasedTerminatedEvaluator, SotopiaDimensions, ) @@ -164,7 +164,7 @@ def _iterate_env_agent_combo_not_in_db( RuleBasedTerminatedEvaluator(max_turn_number=20, max_stale_turn=2), ], terminal_evaluators=[ - ReachGoalLLMEvaluator( + EpisodeLLMEvaluator( model_names["env"], EvaluationForTwoAgents[evaluation_dimensions], # type: ignore # TODO check how to do type annotation diff --git a/examples/experimental/sotopia_original_replica/llm_agent_sotopia.py b/examples/experimental/sotopia_original_replica/llm_agent_sotopia.py index abe959294..46d299a13 100644 --- a/examples/experimental/sotopia_original_replica/llm_agent_sotopia.py +++ b/examples/experimental/sotopia_original_replica/llm_agent_sotopia.py @@ -1,11 +1,14 @@ import logging import sys +import json from rich.logging import RichHandler from aact import NodeFactory from sotopia.experimental.agents.base_agent import BaseAgent from sotopia.experimental.agents.datamodels import Observation, AgentAction +from sotopia.database.persistent_profile import AgentProfile +from typing import Any from sotopia.generation_utils import agenerate from sotopia.generation_utils.generate import StrOutputParser @@ -33,11 +36,13 @@ def __init__( input_channels: list[str], output_channel: str, query_interval: int, - agent_name: str, node_name: str, - goal: str, model_name: str, - redis_url: str, + goal: str, + agent_name: str = "", + background: dict[str, Any] | None = None, + agent_pk: str | None = None, + redis_url: str = "redis://localhost:6379/0", ): super().__init__( [(input_channel, Observation) for input_channel in input_channels], @@ -47,11 +52,35 @@ def __init__( ) self.output_channel = output_channel self.query_interval = query_interval - self.count_ticks = 0 + self.count_ticks: int = 0 self.message_history: list[Observation] = [] - self.name = agent_name - self.model_name = model_name - self.goal = goal + self.goal: str = goal + self.model_name: str = model_name + self.agent_profile_pk: str | None = agent_pk + self.name: str = agent_name + self.background: dict[str, Any] | None = background + self.awake: bool = False + + def set_profile(self, use_pk_value: bool) -> None: + if not use_pk_value: + assert ( + self.background is not None and self.name is not None + ), "Background and name must be provided" + if " " in self.name: + first_name, last_name = self.name.split(" ", 1) + else: + first_name = self.name + last_name = "" + profile = AgentProfile( + first_name=first_name, last_name=last_name, **self.background + ) + profile.save() + else: + profile = AgentProfile.get(pk=self.agent_profile_pk) + + self.agent_profile_pk = profile.pk + self.name = " ".join([profile.first_name, profile.last_name]).strip() + self.background = profile.model_dump() def _format_message_history(self, message_history: list[Observation]) -> str: ## TODO: akhatua Fix the mapping of action to be gramatically correct @@ -59,11 +88,23 @@ def _format_message_history(self, message_history: list[Observation]) -> str: async def aact(self, obs: Observation) -> AgentAction: if obs.turn_number == -1: + if self.awake: + return AgentAction( + agent_name=self.name, + output_channel=self.output_channel, + action_type="none", + argument="", + ) + args = json.loads(obs.last_turn) + self.set_profile(args["use_pk_value"]) + self.awake = True return AgentAction( agent_name=self.name, output_channel=self.output_channel, action_type="none", - argument=self.model_name, + argument=json.dumps( + {"pk": self.agent_profile_pk, "model_name": self.model_name} + ), ) self.message_history.append(obs) diff --git a/examples/experimental/sotopia_original_replica/origin.toml b/examples/experimental/sotopia_original_replica/origin.toml index 7bf225273..562f22fe7 100644 --- a/examples/experimental/sotopia_original_replica/origin.toml +++ b/examples/experimental/sotopia_original_replica/origin.toml @@ -1,5 +1,5 @@ redis_url = "redis://localhost:6379/0" -extra_modules = ["examples.experimental.sotopia_original_replica.llm_agent_sotopia", "examples.experimental.nodes.chat_print_node", "sotopia.experimental.agents.moderator"] +extra_modules = ["examples.experimental.sotopia_original_replica.llm_agent_sotopia", "examples.experimental.nodes.chat_print_node", "sotopia.experimental.agents.moderator","sotopia.experimental.agents.evaluators"] [[nodes]] @@ -9,11 +9,13 @@ node_class = "moderator" [nodes.node_args] output_channels = ["moderator:Jane", "moderator:Jack"] input_channels = ["Jane:moderator", "Jack:moderator"] -agent_backgrounds = {"Jane" = "", "Jack" = ""} +evaluator_channels = [["evaluator:moderator","moderator:evaluator"]] agent_mapping = {"moderator:Jane" = "Jane", "moderator:Jack" = "Jack"} scenario = "Two friends are sitting in a cafe and catching up with each other's lives." -max_turns = 2 +max_turns = 3 push_to_db = false +evaluate_episode = true +use_pk_value = false [[nodes]] node_name = "Jack" @@ -26,6 +28,8 @@ output_channel = "Jack:moderator" goal = "Your goal is to borrow 5000 dollars from Jane." model_name = "gpt-4o-mini" agent_name = "Jack" +background = {"occupation" = "construction worker"} +agent_pk = "" [[nodes]] @@ -39,6 +43,8 @@ input_channels = ["moderator:Jane"] goal = "Your goal is to help Jack however, you are in a finicial crisis yourself and can only afford to give him 500 dollars." model_name = "gpt-4o-mini" agent_name = "Jane" +background = {"occupation" = "gardener"} +agent_pk = "" [[nodes]] node_name = "chat_print" @@ -50,3 +56,12 @@ node_class = "chat_print" [nodes.node_args] env_agents = ["Jack", "Jane"] + +[[nodes]] +node_name = "evaluator" +node_class = "evaluator" + +[nodes.node_args] +input_channels = ["moderator:evaluator"] +output_channels = ["evaluator:moderator"] +model_name = "gpt-4o-mini" diff --git a/examples/fix_missing_episodes.py b/examples/fix_missing_episodes.py index 8f999d2af..8eb27f355 100644 --- a/examples/fix_missing_episodes.py +++ b/examples/fix_missing_episodes.py @@ -20,7 +20,7 @@ ) from sotopia.envs.evaluators import ( EvaluationForTwoAgents, - ReachGoalLLMEvaluator, + EpisodeLLMEvaluator, RuleBasedTerminatedEvaluator, SotopiaDimensions, ) @@ -229,7 +229,7 @@ def yield_env_agent_combo( RuleBasedTerminatedEvaluator(max_turn_number=20, max_stale_turn=2), ], terminal_evaluators=[ - ReachGoalLLMEvaluator( + EpisodeLLMEvaluator( model_names["env"], EvaluationForTwoAgents[SotopiaDimensions], ), diff --git a/examples/fix_missing_episodes_with_tag.py b/examples/fix_missing_episodes_with_tag.py index 31b9ee8fd..312f5e648 100644 --- a/examples/fix_missing_episodes_with_tag.py +++ b/examples/fix_missing_episodes_with_tag.py @@ -36,7 +36,7 @@ ) from sotopia.envs.evaluators import ( EvaluationForTwoAgents, - ReachGoalLLMEvaluator, + EpisodeLLMEvaluator, RuleBasedTerminatedEvaluator, SotopiaDimensions, ) @@ -327,7 +327,7 @@ def yield_env_agent_combo( RuleBasedTerminatedEvaluator(max_turn_number=20, max_stale_turn=2), ], terminal_evaluators=[ - ReachGoalLLMEvaluator( + EpisodeLLMEvaluator( model_names["env"], EvaluationForTwoAgents[SotopiaDimensions], ), diff --git a/examples/use_custom_dimensions.py b/examples/use_custom_dimensions.py index 48c7a195e..576e2dfd5 100644 --- a/examples/use_custom_dimensions.py +++ b/examples/use_custom_dimensions.py @@ -7,7 +7,7 @@ from typing import Type, Union from redis_om import Migrator from sotopia.envs.evaluators import ( - ReachGoalLLMEvaluator, + EpisodeLLMEvaluator, EvaluationForTwoAgents, RuleBasedTerminatedEvaluator, ) @@ -152,7 +152,7 @@ def run_simple_sample_with_custom_samples( custom_dimensions, list_name="custom" ) evaluator = RuleBasedTerminatedEvaluator(max_turn_number=10, max_stale_turn=2) - terminal_evaluator = ReachGoalLLMEvaluator( + terminal_evaluator = EpisodeLLMEvaluator( model_name="gpt-4o-mini", response_format_class=EvaluationForTwoAgents[custom_dimensions_type], # type: ignore ) diff --git a/sotopia-chat/chat_server.py b/sotopia-chat/chat_server.py index e6134e908..aa6a68042 100644 --- a/sotopia-chat/chat_server.py +++ b/sotopia-chat/chat_server.py @@ -21,7 +21,7 @@ EnvironmentProfile, ) from sotopia.envs.evaluators import ( - ReachGoalLLMEvaluator, + EpisodeLLMEvaluator, RuleBasedTerminatedEvaluator, ) from sotopia.envs.parallel import ParallelSotopiaEnv @@ -64,7 +64,7 @@ async def _start_server_with_two_session_ids_and_agent_env_combo( RuleBasedTerminatedEvaluator(max_turn_number=20, max_stale_turn=2), ], terminal_evaluators=[ - ReachGoalLLMEvaluator("gpt-4", EvaluationForTwoAgents[SotopiaDimensions]), + EpisodeLLMEvaluator("gpt-4", EvaluationForTwoAgents[SotopiaDimensions]), ], ) random.shuffle(session_ids) @@ -97,7 +97,7 @@ async def _start_server_with_one_session_id_and_agent_env_combo( RuleBasedTerminatedEvaluator(max_turn_number=20, max_stale_turn=2), ], terminal_evaluators=[ - ReachGoalLLMEvaluator("gpt-4", EvaluationForTwoAgents[SotopiaDimensions]), + EpisodeLLMEvaluator("gpt-4", EvaluationForTwoAgents[SotopiaDimensions]), ], ) diff --git a/sotopia/api/fastapi_server.py b/sotopia/api/fastapi_server.py index f1a3220c6..7c6dcc02d 100644 --- a/sotopia/api/fastapi_server.py +++ b/sotopia/api/fastapi_server.py @@ -24,7 +24,7 @@ from sotopia.envs.parallel import ParallelSotopiaEnv from sotopia.envs.evaluators import ( RuleBasedTerminatedEvaluator, - ReachGoalLLMEvaluator, + EpisodeLLMEvaluator, EvaluationForTwoAgents, SotopiaDimensions, ) @@ -267,7 +267,7 @@ async def nonstreaming_simulation( ), ], "terminal_evaluators": [ - ReachGoalLLMEvaluator( + EpisodeLLMEvaluator( simulation_request.models[0], EvaluationForTwoAgents[SotopiaDimensions], ), diff --git a/sotopia/api/websocket_utils.py b/sotopia/api/websocket_utils.py index 36cb20c5f..4463e542c 100644 --- a/sotopia/api/websocket_utils.py +++ b/sotopia/api/websocket_utils.py @@ -1,6 +1,6 @@ from sotopia.envs.evaluators import ( EvaluationForTwoAgents, - ReachGoalLLMEvaluator, + EpisodeLLMEvaluator, RuleBasedTerminatedEvaluator, ) from sotopia.agents import Agents, LLMAgent @@ -98,7 +98,7 @@ def get_env_agents( RuleBasedTerminatedEvaluator(max_turn_number=20, max_stale_turn=2), ], terminal_evaluators=[ - ReachGoalLLMEvaluator( + EpisodeLLMEvaluator( evaluator_model, EvaluationForTwoAgents[evaluation_dimensions], # type: ignore ), diff --git a/sotopia/cli/benchmark/benchmark.py b/sotopia/cli/benchmark/benchmark.py index 87da66e4e..4e9001812 100644 --- a/sotopia/cli/benchmark/benchmark.py +++ b/sotopia/cli/benchmark/benchmark.py @@ -26,7 +26,7 @@ from sotopia.database.serialization import get_rewards_from_episode from sotopia.envs.evaluators import ( EvaluationForTwoAgents, - ReachGoalLLMEvaluator, + EpisodeLLMEvaluator, RuleBasedTerminatedEvaluator, SotopiaDimensions, ) @@ -363,7 +363,7 @@ def _list_all_env_agent_combo_not_in_db( RuleBasedTerminatedEvaluator(max_turn_number=20, max_stale_turn=2), ], terminal_evaluators=[ - ReachGoalLLMEvaluator( + EpisodeLLMEvaluator( model_names["env"], EvaluationForTwoAgents[SotopiaDimensions], ), diff --git a/sotopia/database/logs.py b/sotopia/database/logs.py index e05618f4b..da66c7fcf 100644 --- a/sotopia/database/logs.py +++ b/sotopia/database/logs.py @@ -27,7 +27,7 @@ class BaseEpisodeLog(BaseModel): tag: str | None = Field(index=True, default="") models: list[str] | None = Field(index=True, default=[]) messages: list[list[tuple[str, str, str]]] # Messages arranged by turn - reasoning: str + reasoning: str = Field(default="") rewards: list[tuple[float, dict[str, float]] | float] # Rewards arranged by turn rewards_prompt: str diff --git a/sotopia/envs/evaluators.py b/sotopia/envs/evaluators.py index 709d0bdaa..23af686d9 100644 --- a/sotopia/envs/evaluators.py +++ b/sotopia/envs/evaluators.py @@ -257,7 +257,7 @@ async def __acall__( return self(turn_number, messages) -class ReachGoalLLMEvaluator(Evaluator, Generic[T_eval_dim]): +class EpisodeLLMEvaluator(Evaluator, Generic[T_eval_dim]): @beartype def __init__( self, diff --git a/sotopia/experimental/agents/evaluators.py b/sotopia/experimental/agents/evaluators.py new file mode 100644 index 000000000..d6ceaedaa --- /dev/null +++ b/sotopia/experimental/agents/evaluators.py @@ -0,0 +1,106 @@ +import json + +from aact import NodeFactory +from .base_agent import BaseAgent +from .logs import EpisodeLog +from .datamodels import AgentAction, Observation +from sotopia.database.persistent_profile import AgentProfile + +from typing import Generic, TypeVar, Type, Any +from pydantic import BaseModel, Field + +from sotopia.envs.evaluators import GoalDimension +from sotopia.generation_utils.generate import agenerate +from langchain.output_parsers import PydanticOutputParser + + +default_reward_prompt = """ +{history} +Based on previous interactions, evaluate how well each of the participants achieve their goals. +Here are the list of agents: +{agent_list} +Please output your evaluation following the format: +{format_instructions} +""" + +T_eval_dim = TypeVar("T_eval_dim", bound=BaseModel) + + +class EvaluationForMutiAgents(BaseModel, Generic[T_eval_dim]): + agents_evaluation: dict[str, T_eval_dim] = Field( + description="the evaluation for each agent, the key is the agent name,be sure to include every agent in the agent list, the value should follow the evaluation dimension format" + ) + + +@NodeFactory.register("evaluator") +class Evaluator(BaseAgent[Observation, AgentAction]): + def __init__( + self, + node_name: str, + model_name: str, + input_channels: list[str], + output_channels: list[str], + redis_url: str, + reward_prompt: str = default_reward_prompt, + eval_dim_class: str = "GoalDimension", + temperature: float = 0.0, + ): + super().__init__( + input_channel_types=[ + (input_channel, Observation) for input_channel in input_channels + ], + output_channel_types=[ + (output_channel, AgentAction) for output_channel in output_channels + ], + node_name=node_name, + redis_url=redis_url, + ) + self.output_channels = output_channels + self.model_name = model_name + self.reward_prompt = reward_prompt + self.temperature = temperature + if eval_dim_class == "GoalDimension": + self.response_format_class: Type[BaseModel] = EvaluationForMutiAgents[ + GoalDimension + ] + else: + raise ValueError( + f"the eval_dim_class : {eval_dim_class} is not implemented" + ) + # TODO: need a registry for the evaluation dimension class, so dimension can be initialized with a str + + async def aact(self, content: Observation) -> AgentAction: + epilog = EpisodeLog(**json.loads(content.last_turn)) + + result = await self.aevaluate(epilog) + return AgentAction( + agent_name="evaluator", + output_channel=f"evaluator:{content.agent_name}", + action_type="speak", + argument=json.dumps( + {"reward": json.dumps(result), "reward_prompt": self.reward_prompt} + ), + ) + + async def aevaluate(self, episode: EpisodeLog) -> Any: + # TODO: below is a temporary implementation, need to replaced by using render_for_humans in EpisodeLog + history = "\n".join( + f"{msg[0][0]} said: {msg[0][2]}" for msg in episode.messages + ) + agent_list = [] + for pk in episode.agents: + agent = AgentProfile.get(pk) + name = agent.first_name + " " + agent.last_name + name = name.strip() + agent_list.append(name) + + res: BaseModel = await agenerate( + model_name=self.model_name, + template=self.reward_prompt, + input_values=dict(history=history, agent_list=str(agent_list)), + output_parser=PydanticOutputParser[self.response_format_class]( # type: ignore[name-defined] + pydantic_object=self.response_format_class + ), + temperature=self.temperature, + ) + return res.model_dump()["agents_evaluation"] diff --git a/sotopia/experimental/agents/logs.py b/sotopia/experimental/agents/logs.py new file mode 100644 index 000000000..4702f46cc --- /dev/null +++ b/sotopia/experimental/agents/logs.py @@ -0,0 +1,8 @@ +from redis_om import JsonModel +from sotopia.database.logs import BaseEpisodeLog +from sotopia.database.persistent_profile import AgentProfile + + +class EpisodeLog(BaseEpisodeLog, JsonModel): + def render_for_humans(self) -> tuple[list[AgentProfile], list[str]]: + raise NotImplementedError diff --git a/sotopia/experimental/agents/moderator.py b/sotopia/experimental/agents/moderator.py index ce57fb38b..5848a8480 100644 --- a/sotopia/experimental/agents/moderator.py +++ b/sotopia/experimental/agents/moderator.py @@ -1,22 +1,21 @@ import asyncio import sys - +import json if sys.version_info < (3, 11): from typing_extensions import Self else: from typing import Self - from aact import Message, NodeFactory, Node from aact.messages import DataModel, DataModelFactory from typing import Literal, Any, AsyncIterator from pydantic import Field -from sotopia.database import EpisodeLog from .datamodels import AgentAction, Observation from sotopia.messages import ActionType +from .logs import EpisodeLog @DataModelFactory.register("observations") @@ -30,12 +29,13 @@ class Observations(DataModel): class Moderator(Node[AgentAction, Observation]): def __init__( self, + node_name: str, input_channels: list[str], output_channels: list[str], scenario: str, agent_mapping: dict[str, str], - node_name: str, - agent_backgrounds: dict[str, str], + evaluator_channels: list[list[str]] = [], + tag: str = "", redis_url: str = "redis://localhost:6379/0", action_order: Literal["simultaneous", "round-robin", "random"] = "round-robin", available_actions: list[ActionType] = [ @@ -47,11 +47,15 @@ def __init__( ], max_turns: int = 20, push_to_db: bool = False, - ): + use_pk_value: bool = False, + evaluate_episode: bool = False, + ) -> None: + print([(channel[0], AgentAction) for channel in evaluator_channels]) super().__init__( input_channel_types=[ (input_channel, AgentAction) for input_channel in input_channels - ], + ] + + [(channel[0], AgentAction) for channel in evaluator_channels], output_channel_types=[ (output_channel, Observation) for output_channel in output_channels ], @@ -62,6 +66,7 @@ def __init__( self.task_scheduler: asyncio.Task[None] | None = None self.shutdown_event: asyncio.Event = asyncio.Event() self.agent_mapping: dict[str, str] = agent_mapping + self.tag: str = tag self.action_order: Literal["simultaneous", "round-robin", "random"] = ( action_order ) @@ -71,14 +76,20 @@ def __init__( self.current_agent_index: int = 0 self.scenario: str = scenario self.agents: list[str] = list(agent_mapping.values()) + self.agents_pk: dict[str, str] = {} self.agent_models: dict[str, str] = {} self.agents_awake: dict[str, bool] = {name: False for name in self.agents} self.all_agents_awake: asyncio.Event = asyncio.Event() - self.message_history: list[list[tuple[str, str, str]]] = [ - [("Environment", "Environment", self.scenario)] - ] - self.push_to_db = push_to_db - self.agent_backgrounds = agent_backgrounds + self.evaluator_channels: list[list[str]] = evaluator_channels + self.push_to_db: bool = push_to_db + self.use_pk_value: bool = use_pk_value + + self.evaluate_episode: bool = evaluate_episode + assert (not self.evaluate_episode) or len( + evaluator_channels + ) > 0, "if evaluate_episode is True, evaluator_channels should not be empty" + + self.epilog: EpisodeLog # will be initialized in booting process if self.action_order == "round-robin": pass @@ -122,7 +133,7 @@ async def _task_scheduler(self) -> None: await self.all_agents_awake.wait() while not self.shutdown_event.is_set(): observation = await self.observation_queue.get() - action_or_none = await self.aact(observation) + action_or_none = await self.astep(observation) if action_or_none is not None: await self.send(action_or_none) self.observation_queue.task_done() @@ -131,7 +142,7 @@ async def booting(self) -> None: """ 1. send checking message to agents for every 0.1 seconds, until all agents are awake - this message has turn_number of -1 for identification, agents should not record this into actual message_history - - if the agent booted succesfully, he is expected to return its model name for record. + - if the agent booted succesfully, he is expected to return its agent_profile's pk for record. 2. (under round-robin action order)after all agents are awake, send agent[0] a message to allow the agent to start speaking """ while not self.all_agents_awake.is_set(): @@ -140,7 +151,11 @@ async def booting(self) -> None: observations_map={ output_channel: Observation( agent_name="moderator", - last_turn=self.scenario, + last_turn=json.dumps( + { + "use_pk_value": self.use_pk_value, + } + ), turn_number=-1, available_actions=["none"], ) @@ -148,21 +163,34 @@ async def booting(self) -> None: } ) ) - await asyncio.sleep(0.1) + await asyncio.sleep(0.2) while not self.observation_queue.empty(): agent_action = await self.observation_queue.get() - self.agents_awake[agent_action.agent_name] = True - self.agent_models[agent_action.agent_name] = agent_action.argument + if not self.agents_awake[agent_action.agent_name]: + self.agents_awake[agent_action.agent_name] = True + args: dict[str, Any] = json.loads(agent_action.argument) + self.agents_pk[agent_action.agent_name] = args["pk"] + self.agent_models[agent_action.agent_name] = args["model_name"] if False not in self.agents_awake.values(): self.all_agents_awake.set() + print("all agents are awake") + self.epilog = EpisodeLog( + environment=self.scenario, + agents=list(self.agents_pk.values()), + tag=self.tag, + models=list(self.agent_models.values()), + messages=[[("Environment", "Environment", self.scenario)]], + rewards=[0.0] * len(self.agents), + rewards_prompt="", + ) if self.action_order == "round-robin": await self.send( Observations( observations_map={ output_channel: Observation( agent_name="moderator", - last_turn=self.agent_backgrounds[agent_name], + last_turn=self.scenario, turn_number=0, available_actions=self.available_actions if agent_name == self.agents[0] @@ -175,39 +203,67 @@ async def booting(self) -> None: self.current_agent_index += 1 async def wrap_up_and_stop(self) -> None: - if self.push_to_db: - await self.save() + try: + await asyncio.sleep(0.1) + print("all agents have left, wrap up and stop") + self.shutdown_event.set() # this will disable the task scheduler + if self.evaluate_episode: + epilog = await self.aeval(self.epilog) + if self.push_to_db: + epilog.save() + except Exception as e: + print(f"error in wrap_up_and_stop: {e}") await asyncio.sleep(0.5) - print("stopping all agents") + print("result of this episode:\n", self.epilog.model_dump_json()) await self.r.publish( - f"shutdown:{self.node_name}", + "shutdown:moderator", "shutdown", ) - async def save(self) -> EpisodeLog: + async def episode_log_to_messages( + self, epilog: EpisodeLog + ) -> list[tuple[str, str, str]]: + messages = [] + for turn_number, turn in enumerate(epilog.messages): + for message in turn: + messages.append((message[0], message[1], message[2])) + return messages + + async def aeval(self, epilog: EpisodeLog) -> EpisodeLog: """ - save the EpisodeLog to redis, without evaluating - TODO: specify what to be added inside tag - TODO: update the code so that EpisodeLog.render_for_humans() can work - -currently it cannot work because no AgentProfile has been uploaded to redis - -such a process should be done back in the agents' end - -also the current agentslist is consist of names, but not uuid's of agents + evaluate the episode + will send the epilog to evaluators, and wait for the evaluation to be finished """ - epilog = EpisodeLog( - environment=self.scenario, - agents=self.agents, - tag=None, - models=list(self.agent_models.values()), - messages=self.message_history, - reasoning="", - rewards=[0] * len(self.agents), - rewards_prompt="", - ) - epilog.save() - # print(epilog.render_for_humans()) + assert len(self.evaluator_channels) == 1, "currently only support one evaluator" + + for evaluator_channel in self.evaluator_channels: + print(evaluator_channel[1]) + await self.r.publish( + evaluator_channel[1], + Message[Observation]( + data=Observation( + agent_name="moderator", + last_turn=epilog.model_dump_json(), + turn_number=self.turn_number, + available_actions=self.available_actions, + ) + ).model_dump_json(), + ) + + print("episode eval started") + + for _ in range( + len(self.evaluator_channels) + ): # the queue will take in input and output from this channel + raw_res = await self.observation_queue.get() + res = json.loads(raw_res.argument) + epilog.rewards = res["reward"] + epilog.rewards_prompt = res["reward_prompt"] + + print("episode eval finished") return epilog - async def aact(self, agent_action: AgentAction) -> Observations | None: + async def astep(self, agent_action: AgentAction) -> Observations | None: if agent_action.action_type == "leave": self.agents_awake[agent_action.agent_name] = False if True not in self.agents_awake.values(): @@ -216,24 +272,16 @@ async def aact(self, agent_action: AgentAction) -> Observations | None: if agent_action.action_type == "none": return None - if len(self.message_history) == 1: - self.message_history[0].append( + # message (sender, receivers (seperated by comma), message content) + self.epilog.messages.append( + [ ( agent_action.agent_name, "Environment", agent_action.to_natural_language(), ) - ) - else: - self.message_history.append( - [ - ( - agent_action.agent_name, - "Environment", - agent_action.to_natural_language(), - ) - ] - ) + ] + ) if self.turn_number < self.max_turns: self.turn_number += 1 @@ -266,5 +314,4 @@ async def aact(self, agent_action: AgentAction) -> Observations | None: ) observations_map[output_channel] = observation self.current_agent_index = (self.current_agent_index + 1) % len(self.agents) - return Observations(observations_map=observations_map) diff --git a/sotopia/server.py b/sotopia/server.py index ba88e9a7b..d838d6007 100644 --- a/sotopia/server.py +++ b/sotopia/server.py @@ -19,7 +19,7 @@ from sotopia.envs import ParallelSotopiaEnv from sotopia.envs.evaluators import ( EvaluationForTwoAgents, - ReachGoalLLMEvaluator, + EpisodeLLMEvaluator, RuleBasedTerminatedEvaluator, SotopiaDimensions, unweighted_aggregate_evaluate, @@ -309,7 +309,7 @@ def get_agent_class( RuleBasedTerminatedEvaluator(max_turn_number=20, max_stale_turn=2), ], "terminal_evaluators": [ - ReachGoalLLMEvaluator( + EpisodeLLMEvaluator( model_dict["env"], EvaluationForTwoAgents[SotopiaDimensions], ), @@ -389,7 +389,7 @@ async def arun_one_script( env_message = [("Environment", script_background)] agent_messages = env_message + agent_messages - evaluator = ReachGoalLLMEvaluator( + evaluator = EpisodeLLMEvaluator( model_name="gpt-4", response_format_class=EvaluationForTwoAgents[SotopiaDimensions], ) @@ -460,7 +460,7 @@ async def aevaluate_one_episode( history = episode.rewards_prompt.replace("Prompt after formatting:", "").split( ",\nBased on previous interactions" )[0] - evaluator = ReachGoalLLMEvaluator( + evaluator = EpisodeLLMEvaluator( model_name=model, response_format_class=EvaluationForTwoAgents[SotopiaDimensions], ) diff --git a/tests/api/test_fastapi.py b/tests/api/test_fastapi.py index 0a4f6c3c9..97b569d17 100644 --- a/tests/api/test_fastapi.py +++ b/tests/api/test_fastapi.py @@ -370,48 +370,3 @@ def test_websocket_simulate(create_mock_data: Callable[[], None]) -> None: "type": "FINISH_SIM", } websocket.send_json(end_msg) - - -# def test_simulate(create_mock_data: Callable[[], None]) -> None: -# response = client.post( -# "/simulate", -# json={ -# "env_id": "tmppk_env_profile", -# "agent_ids": ["tmppk_agent1", "tmppk_agent2"], -# "models": [ -# # "custom/llama3.2:1b@http://localhost:8000/v1", -# # "custom/llama3.2:1b@http://localhost:8000/v1", -# # "custom/llama3.2:1b@http://localhost:8000/v1" -# "gpt-4o-mini", -# "gpt-4o-mini", -# "gpt-4o-mini", -# ], -# "max_turns": 2, -# "tag": "test_tag", -# }, -# ) -# assert response.status_code == 200 -# assert isinstance(response.json(), str) -# max_retries = 20 -# retry_count = 0 -# while retry_count < max_retries: -# try: -# status = NonStreamingSimulationStatus.find( -# NonStreamingSimulationStatus.episode_pk == response.json() -# ).all()[0] -# assert isinstance(status, NonStreamingSimulationStatus) -# print(status) -# if status.status == "Error": -# raise Exception("Error running simulation") -# elif status.status == "Completed": -# # EpisodeLog.get(response.json()) -# break -# # Status is "Started", keep polling -# time.sleep(1) -# retry_count += 1 -# except Exception as e: -# print(f"Error checking simulation status: {e}") -# time.sleep(1) -# retry_count += 1 -# else: -# raise TimeoutError("Simulation timed out after 10 retries") diff --git a/tests/envs/test_evaluators.py b/tests/envs/test_evaluators.py index a5285ea76..ee2381fb4 100644 --- a/tests/envs/test_evaluators.py +++ b/tests/envs/test_evaluators.py @@ -4,7 +4,7 @@ from sotopia.envs.evaluators import ( EvaluationForTwoAgents, - ReachGoalLLMEvaluator, + EpisodeLLMEvaluator, RuleBasedTerminatedEvaluator, unweighted_aggregate_evaluate, ) @@ -134,7 +134,7 @@ class _ReachGoal(BaseModel): @pytest.mark.asyncio async def test_reach_goal_llm_evaluator_async() -> None: - evaluator = ReachGoalLLMEvaluator( + evaluator = EpisodeLLMEvaluator( "custom/structured@http://localhost:8000/v1", response_format_class=EvaluationForTwoAgents[_ReachGoal], ) diff --git a/tests/integration/test_benchmark.py b/tests/integration/test_benchmark.py index 23f97c8d1..6aa86c6cc 100644 --- a/tests/integration/test_benchmark.py +++ b/tests/integration/test_benchmark.py @@ -23,7 +23,7 @@ ) from sotopia.envs.evaluators import ( EvaluationForTwoAgents, - ReachGoalLLMEvaluator, + EpisodeLLMEvaluator, RuleBasedTerminatedEvaluator, SotopiaDimensions, ) @@ -143,7 +143,7 @@ def compose_env_agent_combo( model_name="gpt-4o-mini", evaluators=[RuleBasedTerminatedEvaluator(max_turn_number=1, max_stale_turn=2)], terminal_evaluators=[ - ReachGoalLLMEvaluator( + EpisodeLLMEvaluator( "gpt-4o-mini", EvaluationForTwoAgents[SotopiaDimensions], ) diff --git a/uv.lock b/uv.lock index d2e999d49..025afafa7 100644 --- a/uv.lock +++ b/uv.lock @@ -3258,7 +3258,7 @@ wheels = [ [[package]] name = "sotopia" -version = "0.1.2" +version = "0.1.3" source = { editable = "." } dependencies = [ { name = "aact" },