Merge branch 'sotopia-lab:demo' into demo

sotopia-lab · Jan 7, 2025 · 038ffa3 · 038ffa3
2 parents a3abfee + 6b2db2a
commit 038ffa3
Show file tree

Hide file tree

Showing 26 changed files with 183 additions and 154 deletions.
diff --git a/.github/.codecov.yml b/.github/.codecov.yml
@@ -6,6 +6,7 @@ ignore:
   - ".github" # ignore the .github directory
   - "docs" # ignore the tests directory
   - "figs" # ignore the figs directory
+  - "ui" # ignore the ui directory
 
 coverage:
   status:

diff --git a/examples/use_custom_dimensions.py b/examples/use_custom_dimensions.py
@@ -192,24 +192,7 @@ def run_simple_sample_with_custom_samples(
 
 
 if __name__ == "__main__":
-    """
-        A sample dimension:
-            custom_dimensions: list[dict[str, Union[str, int]]] = [
-                {
-                    "name": "transactivity",
-                    "description": "Analyze the provided social interaction episode between the given pair/team, focusing on identifying instances of transactive exchanges. Evaluate the level of transactivity by considering the following aspects: elaboration, building upon ideas, questioning, argumentation. Analyze whether these transactive patterns persist consistently across the entire interaction or if there are notable variations throughout the exchange. In the 'reasoning' field, provide a comprehensive account of the logic and thought process that led to your conclusion. Consider how the observed instances of transactivity contribute to or detract from the overall quality and depth of the interaction. In the 'score' field, provide an integer score ranging from 0 to 10, where a higher score indicates a higher level of transactivity.",
-                    "range_high": 10,
-                    "range_low": 0,
-                },
-                {
-                    "name": "verbal_equity",
-                    "description": "Analyze the script and measure the level of verbal equity reflected in the interaction between the agents. And then analyze the extent to which the interaction shows a balanced distribution of speaking opportunities among team members. In the 'reasoning' field, provide a comprehensive account of the logic or thought process that led you to your conclusion. Further, provide an integer score ranging from 0 and 10 in the 'score' field. A higher score indicates a higher level of verbal equity.",
-                    "range_high": 10,
-                    "range_low": 0,
-                },
-            ]
-    """
-
+    # here is a sample dimension
     custom_dimensions: list[dict[str, Union[str, int]]] = [
         {
             "name": "transactivity",

diff --git a/sotopia/api/fastapi_server.py b/sotopia/api/fastapi_server.py
@@ -392,6 +392,7 @@ async def get_evaluation_dimensions() -> dict[str, list[CustomEvaluationDimensio
             CustomEvaluationDimension.get(pk=pk)
             for pk in custom_evaluation_dimension_list.dimension_pks
         ]
+    print(custom_evaluation_dimensions)
     return custom_evaluation_dimensions
 
 
@@ -436,31 +437,31 @@ def setup_routes(self) -> None:
             response_model=dict[str, list[CustomEvaluationDimension]],
         )(get_evaluation_dimensions)
 
-        @self.post("/scenarios/", response_model=str)
+        @self.post("/scenarios", response_model=str)
         async def create_scenario(scenario: BaseEnvironmentProfile) -> str:
             scenario_profile = EnvironmentProfile(**scenario.model_dump())
             scenario_profile.save()
             pk = scenario_profile.pk
             assert pk is not None
             return pk
 
-        @self.post("/agents/", response_model=str)
+        @self.post("/agents", response_model=str)
         async def create_agent(agent: BaseAgentProfile) -> str:
             agent_profile = AgentProfile(**agent.model_dump())
             agent_profile.save()
             pk = agent_profile.pk
             assert pk is not None
             return pk
 
-        @self.post("/relationship/", response_model=str)
+        @self.post("/relationship", response_model=str)
         async def create_relationship(relationship: BaseRelationshipProfile) -> str:
             relationship_profile = RelationshipProfile(**relationship.model_dump())
             relationship_profile.save()
             pk = relationship_profile.pk
             assert pk is not None
             return pk
 
-        @self.post("/evaluation_dimensions/", response_model=str)
+        @self.post("/evaluation_dimensions", response_model=str)
         async def create_evaluation_dimensions(
             evaluation_dimensions: CustomEvaluationDimensionsWrapper,
         ) -> str:
@@ -504,7 +505,7 @@ async def create_evaluation_dimensions(
             assert pk is not None
             return pk
 
-        @self.post("/simulate/", response_model=str)
+        @self.post("/simulate", response_model=str)
         def simulate(simulation_request: SimulationRequest) -> Response:
             try:
                 _: EnvironmentProfile = EnvironmentProfile.get(
@@ -601,6 +602,16 @@ async def delete_episode(episode_id: str) -> str:
             EpisodeLog.delete(episode_id)
             return episode_id
 
+        @self.delete(
+            "/evaluation_dimensions/{evaluation_dimension_list_name}",
+            response_model=str,
+        )
+        async def delete_evaluation_dimension_list(
+            evaluation_dimension_list_name: str,
+        ) -> str:
+            CustomEvaluationDimensionList.delete(evaluation_dimension_list_name)
+            return evaluation_dimension_list_name
+
         @self.websocket("/ws/simulation")
         async def websocket_endpoint(websocket: WebSocket, token: str) -> None:
             manager = SimulationManager()

diff --git a/sotopia/database/evaluation_dimensions.py b/sotopia/database/evaluation_dimensions.py
@@ -1,6 +1,6 @@
 from redis_om import JsonModel
 from redis_om.model.model import Field
-from pydantic import create_model, BaseModel
+from pydantic import create_model, BaseModel, AfterValidator
 from typing import Type, Callable, Tuple, Annotated, Union, cast, Any
 
 
@@ -56,7 +56,8 @@ def build_dimension_model(dimension_ids: list[str]) -> Type[BaseModel]:
             range_validator = EvaluationDimensionBuilder.create_range_validator(
                 dimension.range_low, dimension.range_high
             )
-            field_type = Annotated[Tuple[str, int], range_validator]
+            # Need to use AfterValidator to ensure validation happens after type checking
+            field_type = Annotated[Tuple[str, int], AfterValidator(range_validator)]
 
             fields[dimension.name] = (
                 field_type,
@@ -84,7 +85,8 @@ def build_dimension_model_from_dict(
             range_validator = EvaluationDimensionBuilder.create_range_validator(
                 dimension.range_low, dimension.range_high
             )
-            field_type = Annotated[Tuple[str, int], range_validator]
+            # Need to use AfterValidator to ensure validation happens after type checking
+            field_type = Annotated[Tuple[str, int], AfterValidator(range_validator)]
 
             fields[dimension.name] = (
                 field_type,
@@ -118,7 +120,8 @@ def select_existing_dimension_model_by_name(
             range_validator = EvaluationDimensionBuilder.create_range_validator(
                 dimension.range_low, dimension.range_high
             )
-            field_type = Annotated[Tuple[str, int], range_validator]
+            # Need to use AfterValidator to ensure validation happens after type checking
+            field_type = Annotated[Tuple[str, int], AfterValidator(range_validator)]
 
             fields[dimension.name] = (
                 field_type,

diff --git a/tests/api/test_fastapi.py b/tests/api/test_fastapi.py
@@ -251,7 +251,7 @@ def test_get_relationship(create_mock_data: Callable[[], None]) -> None:
 
 
 def test_get_evaluation_dimensions(create_mock_data: Callable[[], None]) -> None:
-    response = client.get("/evaluation_dimensions/")
+    response = client.get("/evaluation_dimensions")
     assert response.status_code == 200
     assert isinstance(response.json(), dict)
     assert response.json()["test_dimension_list"][0]["name"] == "test_dimension"
@@ -264,7 +264,7 @@ def test_create_agent(create_mock_data: Callable[[], None]) -> None:
         "first_name": "test_first_name",
         "last_name": "test_last_name",
     }
-    response = client.post("/agents/", json=agent_data)
+    response = client.post("/agents", json=agent_data)
     assert response.status_code == 200
     assert isinstance(response.json(), str)
 
@@ -277,7 +277,7 @@ def test_create_scenario(create_mock_data: Callable[[], None]) -> None:
         "scenario": "test_scenario",
         "tag": "test",
     }
-    response = client.post("/scenarios/", json=scenario_data)
+    response = client.post("/scenarios", json=scenario_data)
     assert response.status_code == 200
     assert isinstance(response.json(), str)
 

diff --git a/tests/database/test_evaluation_dimensions.py b/tests/database/test_evaluation_dimensions.py
@@ -0,0 +1,113 @@
+import pytest
+from pydantic import ValidationError
+from sotopia.database.evaluation_dimensions import (
+    EvaluationDimensionBuilder,
+    CustomEvaluationDimension,
+    CustomEvaluationDimensionList,
+)
+from typing import Generator, Callable
+
+
+def test_create_range_validator() -> None:
+    validator = EvaluationDimensionBuilder.create_range_validator(0, 10)
+    assert validator(("test", 5)) == ("test", 5)
+
+    with pytest.raises(ValueError):
+        validator(("test", 11))
+
+    with pytest.raises(ValueError):
+        validator(("test", -1))
+
+
+@pytest.fixture
+def test_dimension() -> Generator[None, None, None]:
+    dimension = CustomEvaluationDimension(
+        pk="tmppk_test_dimension",
+        name="test_dimension",
+        description="A test dimension",
+        range_high=10,
+        range_low=0,
+    )
+    dimension.save()
+    yield
+    dimension.delete(dimension.pk)
+
+
+@pytest.fixture
+def test_dimension_list() -> Generator[None, None, None]:
+    dimension = CustomEvaluationDimension(
+        pk="tmppk_test_dimension",
+        name="test_dimension",
+        description="A test dimension",
+        range_high=10,
+        range_low=0,
+    )
+    dimension.save()
+    dimension_list = CustomEvaluationDimensionList(
+        pk="tmppk_test_dimension_list",
+        name="test_list",
+        dimension_pks=["tmppk_test_dimension"],
+    )
+    dimension_list.save()
+    yield
+    dimension.delete("tmppk_test_dimension")
+    dimension_list.delete("tmppk_test_dimension_list")
+
+
+def test_build_dimension_model(test_dimension: Callable[[], None]) -> None:
+    # Test building model from dimension id
+    model = EvaluationDimensionBuilder.build_dimension_model(["tmppk_test_dimension"])
+    instance = model(test_dimension=("example", 5))
+    assert instance.dict()["test_dimension"] == ("example", 5)
+    # Test validation errors for out of range values
+    with pytest.raises(ValidationError):
+        model(test_dimension=("example", 11))
+    with pytest.raises(ValidationError):
+        model(test_dimension=("example", -1))
+
+
+def test_build_dimension_model_from_dict() -> None:
+    # Test building model from dictionary
+    dimensions: list[dict[str, str | int]] = [
+        {
+            "name": "test_dim",
+            "description": "A test dimension",
+            "range_high": 10,
+            "range_low": 0,
+        }
+    ]
+    model = EvaluationDimensionBuilder.build_dimension_model_from_dict(dimensions)
+
+    instance = model(test_dim=("example", 5))
+    assert instance.dict()["test_dim"] == ("example", 5)
+
+    with pytest.raises(ValidationError):
+        model(test_dim=("example", 11))
+
+
+def test_select_existing_dimension_model_by_name(
+    test_dimension: Callable[[], None],
+) -> None:
+    # Test building model from dimension names
+    model = EvaluationDimensionBuilder.select_existing_dimension_model_by_name(
+        ["test_dimension"]
+    )
+    instance = model(test_dimension=("example", 5))
+    assert instance.dict()["test_dimension"] == ("example", 5)
+
+    with pytest.raises(ValidationError):
+        model(test_dimension=("example", 11))
+
+
+def test_select_existing_dimension_model_by_list_name(
+    test_dimension_list: Callable[[], None],
+) -> None:
+    # Test building model from list name
+    model = EvaluationDimensionBuilder.select_existing_dimension_model_by_list_name(
+        "test_list"
+    )
+    instance = model(test_dimension=("example", 5))
+    assert instance.dict()["test_dimension"] == ("example", 5)
+
+    with pytest.raises(ValidationError):
+        model(test_dimension=("example", 11))
diff --git a/ui/streamlit_ui/.streamlit/config.toml → ui/.streamlit/config.toml b/ui/streamlit_ui/.streamlit/config.toml → ui/.streamlit/config.toml
diff --git a/ui/streamlit_ui/README.md → ui/README.md b/ui/streamlit_ui/README.md → ui/README.md
diff --git a/ui/streamlit_ui/__init__.py → ui/__init__.py b/ui/streamlit_ui/__init__.py → ui/__init__.py
diff --git a/ui/streamlit_ui/app.py → ui/app.py b/ui/streamlit_ui/app.py → ui/app.py
diff --git a/ui/streamlit_ui/css/style.css → ui/css/style.css b/ui/streamlit_ui/css/style.css → ui/css/style.css
diff --git a/ui/streamlit_ui/modal_streamlit_app.py → ui/modal_streamlit_app.py b/ui/streamlit_ui/modal_streamlit_app.py → ui/modal_streamlit_app.py
@@ -18,7 +18,8 @@
     )
     .pip_install("streamlit~=1.40.2", "uv")
     .run_commands(
-        "rm -rf sotopia && git clone https://github.com/sotopia-lab/sotopia.git && cd sotopia && git checkout demo && uv pip install pyproject.toml --system && pip install -e . && cd ui/streamlit_ui"
+        "rm -rf sotopia && git clone https://github.com/sotopia-lab/sotopia.git && cd sotopia && git checkout demo && uv pip install pyproject.toml --system && pip install -e . && cd ui/streamlit_ui",
+        force_build=True,
     )
     # .pip_install("pydantic==2.8.2")
     .run_commands("pip list")

diff --git a/ui/streamlit_ui/pages/__init__.py → ui/pages/__init__.py b/ui/streamlit_ui/pages/__init__.py → ui/pages/__init__.py
diff --git a/ui/streamlit_ui/pages/add_characters.py → ui/pages/add_characters.py b/ui/streamlit_ui/pages/add_characters.py → ui/pages/add_characters.py
@@ -1,19 +1,8 @@
-"""
-Definition
-@app.post("/agents/", response_model=str)
-async def create_agent(agent: BaseAgentProfile) -> str:
-    agent_profile = BaseAgentProfile(**agent.model_dump())
-    agent_profile.save()
-    pk = agent_profile.pk
-    assert pk is not None
-    return pk
-"""
-
 import streamlit as st
 import requests
 
 from sotopia.database import BaseAgentProfile
-from ui.streamlit_ui.rendering import local_css
+from ui.rendering import local_css
 
 # add fields for agent profiles
 
@@ -45,7 +34,7 @@ def rendering_character_form() -> None:
         print(agent_profile)
 
         response = requests.post(
-            f"{st.session_state.API_BASE}/agents/",
+            f"{st.session_state.API_BASE}/agents",
             json=agent_profile.model_dump(),
         )
 

diff --git a/...mlit_ui/pages/add_evaluation_dimension.py → ui/pages/add_evaluation_dimension.py b/...mlit_ui/pages/add_evaluation_dimension.py → ui/pages/add_evaluation_dimension.py
@@ -1,42 +1,7 @@
-"""
-Definition
-@app.get(
-    "/evaluation_dimensions/", response_model=dict[str, list[CustomEvaluationDimension]]
-)
-async def get_evaluation_dimensions() -> dict[str, list[CustomEvaluationDimension]]:
-    custom_evaluation_dimensions: dict[str, list[CustomEvaluationDimension]] = {}
-    all_custom_evaluation_dimension_list = CustomEvaluationDimensionList.all()
-    for custom_evaluation_dimension_list in all_custom_evaluation_dimension_list:
-        assert isinstance(
-            custom_evaluation_dimension_list, CustomEvaluationDimensionList
-        )
-        custom_evaluation_dimensions[custom_evaluation_dimension_list.name] = [
-            CustomEvaluationDimension.get(pk=pk)
-            for pk in custom_evaluation_dimension_list.dimension_pks
-        ]
-    return custom_evaluation_dimensions
-
-class CustomEvaluationDimensionsWrapper(BaseModel):
-    pk: str = ""
-    name: str = Field(
-        default="", description="The name of the custom evaluation dimension list"
-    )
-    dimensions: list[CustomEvaluationDimension] = Field(
-        default=[], description="The dimensions of the custom evaluation dimension list"
-    )
-
-class CustomEvaluationDimension(JsonModel):
-    name: str = Field(index=True)
-    description: str = Field(index=True)
-    range_high: int = Field(index=True)
-    range_low: int = Field(index=True)
-
-"""
-
 import streamlit as st
 import requests
 from sotopia.database import BaseCustomEvaluationDimension
-from ui.streamlit_ui.rendering.get_elements import (
+from ui.rendering.get_elements import (
     get_distinct_evaluation_dimensions,
 )
 
@@ -161,7 +126,7 @@ def add_evaluation_dimension() -> None:
                     # st.write(wrapper.dict())
 
                     response = requests.post(
-                        f"{st.session_state.API_BASE}/evaluation_dimensions/",
+                        f"{st.session_state.API_BASE}/evaluation_dimensions",
                         json=wrapper.dict(),
                     )