sotopia-lab · XuhuiZhou · Dec 2, 2024
diff --git a/examples/generate_characters.py b/examples/generate_characters.py
@@ -0,0 +1,12 @@
+import asyncio
+import rich
+from sotopia.generation_utils.generate import agenerate_character_profile
+
+if __name__ == "__main__":
+    agent_profile = asyncio.run(
+        agenerate_character_profile(
+            model_name="gpt-4o",
+            basic_info="She likes certain brands for her clothes and shoes, and barely purchase items from other brands she is not familiar with. However, she likes to browse the shopping website from time to time, and only purchase some items when they are on sale. She is an extraverted person and asks a lot of questions in r/AskReddit regarding her own life or thought. She sometimes create new forums about a new topic that she is particularly interested in and welcome people joining. She has a Jeep Wrangler and likes to travel around, especially off-road. She often do road trips during vacations, and likes to plan for optimal travel routime in terms of time and distance.",
+        )
+    )
+    rich.print(agent_profile)
diff --git a/sotopia/agents/__init__.py b/sotopia/agents/__init__.py
@@ -1,6 +1,5 @@
 from .base_agent import BaseAgent
 from .generate_agent_background import (
-    generate_background,
     generate_background_conversation,
 )
 from .llm_agent import (
@@ -16,7 +15,6 @@
     "LLMAgent",
     "Agents",
     "HumanAgent",
-    "generate_background",
     "generate_background_conversation",
     "RedisAgent",
     "ScriptWritingAgent",

diff --git a/sotopia/agents/generate_agent_background.py b/sotopia/agents/generate_agent_background.py
@@ -1,43 +1,9 @@
 import json
-import os
 from typing import Callable
 
-from sotopia.generation_utils.generate import convert_narratives, agenerate_init_profile
 from sotopia.messages import Message, ScriptBackground
 
 
-async def generate_background(
-    info_json_file: str, basic_info: dict[str, str]
-) -> tuple[str, str, str, str, list[dict[str, str]]]:
-    if os.path.isfile(info_json_file):
-        with open(info_json_file, "r") as f:
-            info_dict = json.load(f)
-            initial_profile = str(info_dict["initial_profile"])
-            profile = info_dict["profile"]
-            first_narrative = info_dict["first_narrative_profile"]
-            second_narrative = info_dict["second_narrative_profile"]
-            previous_messages = info_dict["messages"]
-    else:
-        initial_profile = str(basic_info)
-        profile = await agenerate_init_profile(
-            model_name="gpt-4o-mini", basic_info=basic_info
-        )
-        first_narrative = convert_narratives(
-            model_name="gpt-4o-mini", narrative="first", text=profile
-        )
-        second_narrative = convert_narratives(
-            model_name="gpt-4o-mini", narrative="second", text=profile
-        )
-        previous_messages = []
-    return (
-        initial_profile,
-        profile,
-        first_narrative,
-        second_narrative,
-        previous_messages,
-    )
-
-
 def generate_background_conversation(
     seed: dict[str, str],
     basic_info: dict[str, str],

diff --git a/sotopia/database/persistent_profile.py b/sotopia/database/persistent_profile.py
@@ -21,21 +21,169 @@ class RelationshipType(IntEnum):
 
 
 class AgentProfile(JsonModel):
-    first_name: str = Field(index=True)
-    last_name: str = Field(index=True)
-    age: int = Field(index=True, default_factory=lambda: 0)
-    occupation: str = Field(index=True, default_factory=lambda: "")
-    gender: str = Field(index=True, default_factory=lambda: "")
-    gender_pronoun: str = Field(index=True, default_factory=lambda: "")
-    public_info: str = Field(index=True, default_factory=lambda: "")
-    big_five: str = Field(index=True, default_factory=lambda: "")
-    moral_values: list[str] = Field(index=False, default_factory=lambda: [])
-    schwartz_personal_values: list[str] = Field(index=False, default_factory=lambda: [])
-    personality_and_values: str = Field(index=True, default_factory=lambda: "")
-    decision_making_style: str = Field(index=True, default_factory=lambda: "")
-    secret: str = Field(default_factory=lambda: "")
-    model_id: str = Field(default_factory=lambda: "")
-    mbti: str = Field(default_factory=lambda: "")
+    first_name: str = Field(index=True, description="The first name of the character")
+    last_name: str = Field(index=True, description="The last name of the character")
+    age: int = Field(
+        index=True, default_factory=lambda: 0, description="The age of the character"
+    )
+    occupation: str = Field(
+        index=True,
+        default_factory=lambda: "",
+        description="The occupation of the character",
+    )
+    gender: str = Field(
+        index=True,
+        default_factory=lambda: "",
+        description="The gender of the character",
+    )
+    gender_pronoun: str = Field(
+        index=True,
+        default_factory=lambda: "",
+        description="The gender pronoun of the character",
+    )
+    public_info: str = Field(
+        index=True,
+        default_factory=lambda: "",
+        description="Public information about the character",
+    )
+    big_five: str = Field(
+        index=True,
+        default_factory=lambda: "",
+        description="The Big Five personality traits of the character",
+    )
+    moral_values: list[str] = Field(
+        index=False,
+        default_factory=lambda: [],
+        description="The moral values of the character, use the categories from Moral Foundations Theory (https://en.wikipedia.org/wiki/Moral_foundations_theory)",
+    )
+    schwartz_personal_values: list[str] = Field(
+        index=False,
+        default_factory=lambda: [],
+        description="The Schwartz personal values of the character, use the categories from Schwartz's Theory of Basic Values (https://scholarworks.gvsu.edu/cgi/viewcontent.cgi?article=1116&context=orpc)",
+    )
+    decision_making_style: str = Field(
+        index=True,
+        default_factory=lambda: "",
+        description="The decision-making style of the character (categories from The Development and Validation of the Rational and Intuitive Decision Styles Scale)",
+    )
+    personality_and_values: str = Field(
+        index=True,
+        default_factory=lambda: "",
+        description="A summary of the character's personality and values",
+    )
+    secret: str = Field(
+        default_factory=lambda: "", description="Secrets about the character"
+    )
+    model_id: str = Field(default_factory=lambda: "", description="?")
+    mbti: str = Field(
+        default_factory=lambda: "",
+        description="The MBTI personality type of the character",
+    )
+    # New added fields to merge with the Stanford 1000 agent profiles (https://github.com/joonspk-research/genagents)
+    ethnicity: str = Field(
+        default_factory=lambda: "",
+        description="The ethnicity of the character, for example, 'American', 'Portugal', 'Chinese', etc.",
+    )
+    race: str = Field(
+        default_factory=lambda: "", description="The race of the character"
+    )
+    detailed_race: str = Field(
+        default_factory=lambda: "",
+        description="The detailed race of the character",
+    )
+    hispanic_origin: str = Field(
+        default_factory=lambda: "",
+        description="The hispanic origin of the character, e.g., 'hispanic', 'not hispanic'",
+    )
+    street_address: str = Field(
+        default_factory=lambda: "",
+        description="The street address of the character, e.g., '123 Elmwood Drive'",
+    )
+    city: str = Field(
+        default_factory=lambda: "",
+        description="The city where the character resides, e.g., 'Little Rock'",
+    )
+    state: str = Field(
+        default_factory=lambda: "",
+        description="The state where the character resides, e.g., 'AR'",
+    )
+    political_views: str = Field(
+        default_factory=lambda: "",
+        description="The political views of the character, e.g., 'Slightly liberal'",
+    )
+    party_identification: str = Field(
+        default_factory=lambda: "",
+        description="The party identification of the character, e.g., 'Independent, close to democrat'",
+    )
+    residence_at_16: str = Field(
+        default_factory=lambda: "",
+        description="The region where the character resided at age 16, e.g., 'West South Central'",
+    )
+    same_residence_since_16: str = Field(
+        default_factory=lambda: "",
+        description="Whether the character has lived in the same state since age 16, e.g., 'Different state'",
+    )
+    family_structure_at_16: str = Field(
+        default_factory=lambda: "",
+        description="The family structure of the character at age 16, e.g., 'Lived with parents'",
+    )
+    family_income_at_16: str = Field(
+        default_factory=lambda: "",
+        description="The family income of the character at age 16, e.g., 'Average'",
+    )
+    fathers_highest_degree: str = Field(
+        default_factory=lambda: "",
+        description="The highest degree obtained by the character's father, e.g., 'High school'",
+    )
+    mothers_highest_degree: str = Field(
+        default_factory=lambda: "",
+        description="The highest degree obtained by the character's mother, e.g., 'High school'",
+    )
+    mothers_work_history: str = Field(
+        default_factory=lambda: "",
+        description="The work history of the character's mother, e.g., 'No'",
+    )
+    marital_status: str = Field(
+        default_factory=lambda: "",
+        description="The marital status of the character, e.g., 'Never married'",
+    )
+    work_status: str = Field(
+        default_factory=lambda: "",
+        description="The work status of the character, e.g., 'With a job, but not at work because of temporary illness, vacation, strike'",
+    )
+    military_service_duration: str = Field(
+        default_factory=lambda: "",
+        description="The duration of military service of the character, e.g., 'No active duty'",
+    )
+    religion: str = Field(
+        default_factory=lambda: "",
+        description="The religion of the character, e.g., 'Catholic'",
+    )
+    religion_at_16: str = Field(
+        default_factory=lambda: "",
+        description="The religion of the character at age 16, e.g., 'Catholic'",
+    )
+    born_in_us: str = Field(
+        default_factory=lambda: "",
+        description="Whether the character was born in the US, e.g., 'Yes'",
+    )
+    us_citizenship_status: str = Field(
+        default_factory=lambda: "",
+        description="The US citizenship status of the character, e.g., 'A U.S. citizen'",
+    )
+    highest_degree_received: str = Field(
+        default_factory=lambda: "",
+        description="The highest degree received by the character, e.g., 'High school'",
+    )
+    speak_other_language: str = Field(
+        default_factory=lambda: "",
+        description="Whether the character speaks another language, e.g., 'No'",
+    )
+    total_wealth: str = Field(
+        default_factory=lambda: "",
+        description="The total wealth of the character, e.g., 'Less than $5,000'. Make sure to include a certain value with US dollars as the unit.",
+    )
+
     tag: str = Field(
         index=True,
         default_factory=lambda: "",

diff --git a/sotopia/generation_utils/generate.py b/sotopia/generation_utils/generate.py
@@ -24,7 +24,7 @@
 from rich import print
 from typing_extensions import Literal
 
-from sotopia.database import EnvironmentProfile, RelationshipProfile
+from sotopia.database import EnvironmentProfile, RelationshipProfile, AgentProfile
 from sotopia.messages import ActionType, AgentAction, ScriptBackground
 from sotopia.messages.message_classes import (
     ScriptInteraction,
@@ -772,45 +772,22 @@ def process_history(
 
 
 @beartype
-async def agenerate_init_profile(
+async def agenerate_character_profile(
     model_name: str,
-    basic_info: dict[str, str],
+    basic_info: str,
     bad_output_process_model: str | None = None,
     use_fixed_model_version: bool = True,
-) -> str:
+) -> AgentProfile:
     """
     Using langchain to generate the background
     """
     return await agenerate(
         model_name=model_name,
-        template="""Please expand a fictional background for {name}. Here is the basic information:
-            {name}'s age: {age}
-            {name}'s gender identity: {gender_identity}
-            {name}'s pronouns: {pronoun}
-            {name}'s occupation: {occupation}
-            {name}'s big 5 personality traits: {bigfive}
-            {name}'s moral Foundation: think {mft} is more important than others
-            {name}'s Schwartz portrait value: {schwartz}
-            {name}'s decision-making style: {decision_style}
-            {name}'s secret: {secret}
-            Include the previous information in the background.
-            Then expand the personal backgrounds with concrete details (e.g, look, family, hobbies, friends and etc.)
-            For the personality and values (e.g., MBTI, moral foundation, and etc.),
-            remember to use examples and behaviors in the person's life to demonstrate it.
+        template="""Generate a realistic character profile using the provided information. If any details are missing, make educated guesses to fill them in. DO NOT leave any entry blank.
+            {basic_info}
             """,
-        input_values=dict(
-            name=basic_info["name"],
-            age=basic_info["age"],
-            gender_identity=basic_info["gender_identity"],
-            pronoun=basic_info["pronoun"],
-            occupation=basic_info["occupation"],
-            bigfive=basic_info["Big_Five_Personality"],
-            mft=basic_info["Moral_Foundation"],
-            schwartz=basic_info["Schwartz_Portrait_Value"],
-            decision_style=basic_info["Decision_making_Style"],
-            secret=basic_info["secret"],
-        ),
-        output_parser=StrOutputParser(),
+        input_values=dict(basic_info=basic_info),
+        output_parser=PydanticOutputParser(pydantic_object=AgentProfile),
         bad_output_process_model=bad_output_process_model,
         use_fixed_model_version=use_fixed_model_version,
     )