Skip to content

Commit

Permalink
Merge branch 'sotopia-lab:demo' into demo
Browse files Browse the repository at this point in the history
  • Loading branch information
JXZhou0224 authored Jan 7, 2025
2 parents a3abfee + 6b2db2a commit 038ffa3
Show file tree
Hide file tree
Showing 26 changed files with 183 additions and 154 deletions.
1 change: 1 addition & 0 deletions .github/.codecov.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ ignore:
- ".github" # ignore the .github directory
- "docs" # ignore the tests directory
- "figs" # ignore the figs directory
- "ui" # ignore the ui directory

coverage:
status:
Expand Down
19 changes: 1 addition & 18 deletions examples/use_custom_dimensions.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,24 +192,7 @@ def run_simple_sample_with_custom_samples(


if __name__ == "__main__":
"""
A sample dimension:
custom_dimensions: list[dict[str, Union[str, int]]] = [
{
"name": "transactivity",
"description": "Analyze the provided social interaction episode between the given pair/team, focusing on identifying instances of transactive exchanges. Evaluate the level of transactivity by considering the following aspects: elaboration, building upon ideas, questioning, argumentation. Analyze whether these transactive patterns persist consistently across the entire interaction or if there are notable variations throughout the exchange. In the 'reasoning' field, provide a comprehensive account of the logic and thought process that led to your conclusion. Consider how the observed instances of transactivity contribute to or detract from the overall quality and depth of the interaction. In the 'score' field, provide an integer score ranging from 0 to 10, where a higher score indicates a higher level of transactivity.",
"range_high": 10,
"range_low": 0,
},
{
"name": "verbal_equity",
"description": "Analyze the script and measure the level of verbal equity reflected in the interaction between the agents. And then analyze the extent to which the interaction shows a balanced distribution of speaking opportunities among team members. In the 'reasoning' field, provide a comprehensive account of the logic or thought process that led you to your conclusion. Further, provide an integer score ranging from 0 and 10 in the 'score' field. A higher score indicates a higher level of verbal equity.",
"range_high": 10,
"range_low": 0,
},
]
"""

# here is a sample dimension
custom_dimensions: list[dict[str, Union[str, int]]] = [
{
"name": "transactivity",
Expand Down
21 changes: 16 additions & 5 deletions sotopia/api/fastapi_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -392,6 +392,7 @@ async def get_evaluation_dimensions() -> dict[str, list[CustomEvaluationDimensio
CustomEvaluationDimension.get(pk=pk)
for pk in custom_evaluation_dimension_list.dimension_pks
]
print(custom_evaluation_dimensions)
return custom_evaluation_dimensions


Expand Down Expand Up @@ -436,31 +437,31 @@ def setup_routes(self) -> None:
response_model=dict[str, list[CustomEvaluationDimension]],
)(get_evaluation_dimensions)

@self.post("/scenarios/", response_model=str)
@self.post("/scenarios", response_model=str)
async def create_scenario(scenario: BaseEnvironmentProfile) -> str:
scenario_profile = EnvironmentProfile(**scenario.model_dump())
scenario_profile.save()
pk = scenario_profile.pk
assert pk is not None
return pk

@self.post("/agents/", response_model=str)
@self.post("/agents", response_model=str)
async def create_agent(agent: BaseAgentProfile) -> str:
agent_profile = AgentProfile(**agent.model_dump())
agent_profile.save()
pk = agent_profile.pk
assert pk is not None
return pk

@self.post("/relationship/", response_model=str)
@self.post("/relationship", response_model=str)
async def create_relationship(relationship: BaseRelationshipProfile) -> str:
relationship_profile = RelationshipProfile(**relationship.model_dump())
relationship_profile.save()
pk = relationship_profile.pk
assert pk is not None
return pk

@self.post("/evaluation_dimensions/", response_model=str)
@self.post("/evaluation_dimensions", response_model=str)
async def create_evaluation_dimensions(
evaluation_dimensions: CustomEvaluationDimensionsWrapper,
) -> str:
Expand Down Expand Up @@ -504,7 +505,7 @@ async def create_evaluation_dimensions(
assert pk is not None
return pk

@self.post("/simulate/", response_model=str)
@self.post("/simulate", response_model=str)
def simulate(simulation_request: SimulationRequest) -> Response:
try:
_: EnvironmentProfile = EnvironmentProfile.get(
Expand Down Expand Up @@ -601,6 +602,16 @@ async def delete_episode(episode_id: str) -> str:
EpisodeLog.delete(episode_id)
return episode_id

@self.delete(
"/evaluation_dimensions/{evaluation_dimension_list_name}",
response_model=str,
)
async def delete_evaluation_dimension_list(
evaluation_dimension_list_name: str,
) -> str:
CustomEvaluationDimensionList.delete(evaluation_dimension_list_name)
return evaluation_dimension_list_name

@self.websocket("/ws/simulation")
async def websocket_endpoint(websocket: WebSocket, token: str) -> None:
manager = SimulationManager()
Expand Down
11 changes: 7 additions & 4 deletions sotopia/database/evaluation_dimensions.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from redis_om import JsonModel
from redis_om.model.model import Field
from pydantic import create_model, BaseModel
from pydantic import create_model, BaseModel, AfterValidator
from typing import Type, Callable, Tuple, Annotated, Union, cast, Any


Expand Down Expand Up @@ -56,7 +56,8 @@ def build_dimension_model(dimension_ids: list[str]) -> Type[BaseModel]:
range_validator = EvaluationDimensionBuilder.create_range_validator(
dimension.range_low, dimension.range_high
)
field_type = Annotated[Tuple[str, int], range_validator]
# Need to use AfterValidator to ensure validation happens after type checking
field_type = Annotated[Tuple[str, int], AfterValidator(range_validator)]

fields[dimension.name] = (
field_type,
Expand Down Expand Up @@ -84,7 +85,8 @@ def build_dimension_model_from_dict(
range_validator = EvaluationDimensionBuilder.create_range_validator(
dimension.range_low, dimension.range_high
)
field_type = Annotated[Tuple[str, int], range_validator]
# Need to use AfterValidator to ensure validation happens after type checking
field_type = Annotated[Tuple[str, int], AfterValidator(range_validator)]

fields[dimension.name] = (
field_type,
Expand Down Expand Up @@ -118,7 +120,8 @@ def select_existing_dimension_model_by_name(
range_validator = EvaluationDimensionBuilder.create_range_validator(
dimension.range_low, dimension.range_high
)
field_type = Annotated[Tuple[str, int], range_validator]
# Need to use AfterValidator to ensure validation happens after type checking
field_type = Annotated[Tuple[str, int], AfterValidator(range_validator)]

fields[dimension.name] = (
field_type,
Expand Down
6 changes: 3 additions & 3 deletions tests/api/test_fastapi.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,7 @@ def test_get_relationship(create_mock_data: Callable[[], None]) -> None:


def test_get_evaluation_dimensions(create_mock_data: Callable[[], None]) -> None:
response = client.get("/evaluation_dimensions/")
response = client.get("/evaluation_dimensions")
assert response.status_code == 200
assert isinstance(response.json(), dict)
assert response.json()["test_dimension_list"][0]["name"] == "test_dimension"
Expand All @@ -264,7 +264,7 @@ def test_create_agent(create_mock_data: Callable[[], None]) -> None:
"first_name": "test_first_name",
"last_name": "test_last_name",
}
response = client.post("/agents/", json=agent_data)
response = client.post("/agents", json=agent_data)
assert response.status_code == 200
assert isinstance(response.json(), str)

Expand All @@ -277,7 +277,7 @@ def test_create_scenario(create_mock_data: Callable[[], None]) -> None:
"scenario": "test_scenario",
"tag": "test",
}
response = client.post("/scenarios/", json=scenario_data)
response = client.post("/scenarios", json=scenario_data)
assert response.status_code == 200
assert isinstance(response.json(), str)

Expand Down
113 changes: 113 additions & 0 deletions tests/database/test_evaluation_dimensions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
import pytest
from pydantic import ValidationError
from sotopia.database.evaluation_dimensions import (
EvaluationDimensionBuilder,
CustomEvaluationDimension,
CustomEvaluationDimensionList,
)
from typing import Generator, Callable


def test_create_range_validator() -> None:
validator = EvaluationDimensionBuilder.create_range_validator(0, 10)
assert validator(("test", 5)) == ("test", 5)

with pytest.raises(ValueError):
validator(("test", 11))

with pytest.raises(ValueError):
validator(("test", -1))


@pytest.fixture
def test_dimension() -> Generator[None, None, None]:
dimension = CustomEvaluationDimension(
pk="tmppk_test_dimension",
name="test_dimension",
description="A test dimension",
range_high=10,
range_low=0,
)
dimension.save()
yield
dimension.delete(dimension.pk)


@pytest.fixture
def test_dimension_list() -> Generator[None, None, None]:
dimension = CustomEvaluationDimension(
pk="tmppk_test_dimension",
name="test_dimension",
description="A test dimension",
range_high=10,
range_low=0,
)
dimension.save()
dimension_list = CustomEvaluationDimensionList(
pk="tmppk_test_dimension_list",
name="test_list",
dimension_pks=["tmppk_test_dimension"],
)
dimension_list.save()
yield
dimension.delete("tmppk_test_dimension")
dimension_list.delete("tmppk_test_dimension_list")


def test_build_dimension_model(test_dimension: Callable[[], None]) -> None:
# Test building model from dimension id
model = EvaluationDimensionBuilder.build_dimension_model(["tmppk_test_dimension"])
instance = model(test_dimension=("example", 5))
assert instance.dict()["test_dimension"] == ("example", 5)
# Test validation errors for out of range values
with pytest.raises(ValidationError):
model(test_dimension=("example", 11))
with pytest.raises(ValidationError):
model(test_dimension=("example", -1))


def test_build_dimension_model_from_dict() -> None:
# Test building model from dictionary
dimensions: list[dict[str, str | int]] = [
{
"name": "test_dim",
"description": "A test dimension",
"range_high": 10,
"range_low": 0,
}
]
model = EvaluationDimensionBuilder.build_dimension_model_from_dict(dimensions)

instance = model(test_dim=("example", 5))
assert instance.dict()["test_dim"] == ("example", 5)

with pytest.raises(ValidationError):
model(test_dim=("example", 11))


def test_select_existing_dimension_model_by_name(
test_dimension: Callable[[], None],
) -> None:
# Test building model from dimension names
model = EvaluationDimensionBuilder.select_existing_dimension_model_by_name(
["test_dimension"]
)
instance = model(test_dimension=("example", 5))
assert instance.dict()["test_dimension"] == ("example", 5)

with pytest.raises(ValidationError):
model(test_dimension=("example", 11))


def test_select_existing_dimension_model_by_list_name(
test_dimension_list: Callable[[], None],
) -> None:
# Test building model from list name
model = EvaluationDimensionBuilder.select_existing_dimension_model_by_list_name(
"test_list"
)
instance = model(test_dimension=("example", 5))
assert instance.dict()["test_dimension"] == ("example", 5)

with pytest.raises(ValidationError):
model(test_dimension=("example", 11))
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@
)
.pip_install("streamlit~=1.40.2", "uv")
.run_commands(
"rm -rf sotopia && git clone https://github.com/sotopia-lab/sotopia.git && cd sotopia && git checkout demo && uv pip install pyproject.toml --system && pip install -e . && cd ui/streamlit_ui"
"rm -rf sotopia && git clone https://github.com/sotopia-lab/sotopia.git && cd sotopia && git checkout demo && uv pip install pyproject.toml --system && pip install -e . && cd ui/streamlit_ui",
force_build=True,
)
# .pip_install("pydantic==2.8.2")
.run_commands("pip list")
Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,19 +1,8 @@
"""
Definition
@app.post("/agents/", response_model=str)
async def create_agent(agent: BaseAgentProfile) -> str:
agent_profile = BaseAgentProfile(**agent.model_dump())
agent_profile.save()
pk = agent_profile.pk
assert pk is not None
return pk
"""

import streamlit as st
import requests

from sotopia.database import BaseAgentProfile
from ui.streamlit_ui.rendering import local_css
from ui.rendering import local_css

# add fields for agent profiles

Expand Down Expand Up @@ -45,7 +34,7 @@ def rendering_character_form() -> None:
print(agent_profile)

response = requests.post(
f"{st.session_state.API_BASE}/agents/",
f"{st.session_state.API_BASE}/agents",
json=agent_profile.model_dump(),
)

Expand Down
Original file line number Diff line number Diff line change
@@ -1,42 +1,7 @@
"""
Definition
@app.get(
"/evaluation_dimensions/", response_model=dict[str, list[CustomEvaluationDimension]]
)
async def get_evaluation_dimensions() -> dict[str, list[CustomEvaluationDimension]]:
custom_evaluation_dimensions: dict[str, list[CustomEvaluationDimension]] = {}
all_custom_evaluation_dimension_list = CustomEvaluationDimensionList.all()
for custom_evaluation_dimension_list in all_custom_evaluation_dimension_list:
assert isinstance(
custom_evaluation_dimension_list, CustomEvaluationDimensionList
)
custom_evaluation_dimensions[custom_evaluation_dimension_list.name] = [
CustomEvaluationDimension.get(pk=pk)
for pk in custom_evaluation_dimension_list.dimension_pks
]
return custom_evaluation_dimensions
class CustomEvaluationDimensionsWrapper(BaseModel):
pk: str = ""
name: str = Field(
default="", description="The name of the custom evaluation dimension list"
)
dimensions: list[CustomEvaluationDimension] = Field(
default=[], description="The dimensions of the custom evaluation dimension list"
)
class CustomEvaluationDimension(JsonModel):
name: str = Field(index=True)
description: str = Field(index=True)
range_high: int = Field(index=True)
range_low: int = Field(index=True)
"""

import streamlit as st
import requests
from sotopia.database import BaseCustomEvaluationDimension
from ui.streamlit_ui.rendering.get_elements import (
from ui.rendering.get_elements import (
get_distinct_evaluation_dimensions,
)

Expand Down Expand Up @@ -161,7 +126,7 @@ def add_evaluation_dimension() -> None:
# st.write(wrapper.dict())

response = requests.post(
f"{st.session_state.API_BASE}/evaluation_dimensions/",
f"{st.session_state.API_BASE}/evaluation_dimensions",
json=wrapper.dict(),
)

Expand Down
Loading

0 comments on commit 038ffa3

Please sign in to comment.