Skip to content

Commit

Permalink
fix descriptor columns (#1440)
Browse files Browse the repository at this point in the history
  • Loading branch information
mike0sv authored Jan 23, 2025
1 parent f185c62 commit b9ad296
Show file tree
Hide file tree
Showing 7 changed files with 51 additions and 44 deletions.
5 changes: 4 additions & 1 deletion src/evidently/base_metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,13 +77,16 @@ class DatasetType(Enum):
ADDITIONAL = "additional"


DisplayName = str


@autoregister
class ColumnName(EnumValueMixin, EvidentlyBaseModel):
class Config:
type_alias = "evidently:base:ColumnName"

name: str
display_name: str
display_name: DisplayName
dataset: DatasetType
_feature_class: Optional["GeneratedFeatures"] = PrivateAttr(None)

Expand Down
2 changes: 1 addition & 1 deletion src/evidently/features/generated_features.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ def feature_name(self, subcolumn: Optional[str] = None):
return self.as_column(subcolumn)

def _create_column_name(self, subcolumn: Optional[str]) -> str:
subcolumn = f".{subcolumn}" if subcolumn is not None else ""
subcolumn = f".{subcolumn}" if subcolumn is not None and len(subcolumn) > 0 else ""
return f"{self.get_fingerprint()}{subcolumn}"

def _extract_subcolumn_name(self, column_name: str) -> Optional[str]:
Expand Down
6 changes: 3 additions & 3 deletions src/evidently/features/llm_judge.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ class Config:
include_score: bool = False
score_range: Tuple[float, float] = (0.0, 1.0)

output_column: str = "category"
output_column: str = ""
output_reasoning_column: str = "reasoning"
output_score_column: str = "score"

Expand Down Expand Up @@ -153,7 +153,7 @@ def get_type(self, subcolumn: Optional[str]) -> ColumnType:
return ColumnType.Text
if subcolumn == self.output_score_column:
return ColumnType.Numerical
if subcolumn == self.output_column:
if subcolumn == self.output_column or subcolumn is None:
return ColumnType.Categorical
raise ValueError(f"Unknown subcolumn {subcolumn}")

Expand Down Expand Up @@ -196,7 +196,7 @@ def generate_features(self, data: pd.DataFrame, data_definition: DataDefinition,

def list_columns(self) -> List["ColumnName"]:
return [
self._create_column(c, display_name=f"{self.display_name or ''} {c}")
self._create_column(c, display_name=f"{self.display_name or ''} {c}".strip())
for c in self.template.list_output_columns()
]

Expand Down
31 changes: 14 additions & 17 deletions src/evidently/future/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

from evidently import ColumnMapping
from evidently import ColumnType
from evidently.base_metric import DisplayName
from evidently.features.generated_features import GeneratedFeatures
from evidently.metric_results import Label
from evidently.options.base import Options
Expand Down Expand Up @@ -177,7 +178,7 @@ def __init__(self, alias: str):
self._alias = alias

@abc.abstractmethod
def generate_data(self, dataset: "Dataset") -> Union[DatasetColumn, Dict[str, DatasetColumn]]:
def generate_data(self, dataset: "Dataset") -> Union[DatasetColumn, Dict[DisplayName, DatasetColumn]]:
raise NotImplementedError()

@property
Expand All @@ -192,22 +193,22 @@ def __init__(self, feature: GeneratedFeatures, alias: Optional[str] = None):
self._feature = feature

def get_dataset_column(self, column_name: str, values: pd.Series) -> DatasetColumn:
column_type = self._feature.get_type(f"{self._feature.get_fingerprint()}.{column_name}")
column_type = self._feature.get_type(column_name)
if column_type == ColumnType.Numerical:
values = pd.to_numeric(values, errors="coerce")
dataset_column = DatasetColumn(type=column_type, data=values)
return dataset_column

def generate_data(self, dataset: "Dataset") -> Union[DatasetColumn, Dict[str, DatasetColumn]]:
feature = self._feature.generate_features(
def generate_data(self, dataset: "Dataset") -> Union[DatasetColumn, Dict[DisplayName, DatasetColumn]]:
feature = self._feature.generate_features_renamed(
dataset.as_dataframe(),
create_data_definition(None, dataset.as_dataframe(), ColumnMapping()),
Options(),
)
if len(feature.columns) > 1:
return {col: self.get_dataset_column(col, feature[col]) for col in feature.columns}
col = feature.columns[0]
return self.get_dataset_column(col, feature[col])
return {
col.display_name: self.get_dataset_column(col.name, feature[col.name])
for col in self._feature.list_columns()
}


def _determine_desccriptor_column_name(alias: str, columns: List[str]):
Expand Down Expand Up @@ -361,15 +362,11 @@ def add_column(self, key: str, data: DatasetColumn):
self._data_definition.categorical_descriptors.append(key)

def add_descriptor(self, descriptor: Descriptor):
key = _determine_desccriptor_column_name(descriptor.alias, self._data.columns.tolist())
new_column = descriptor.generate_data(self)
if isinstance(new_column, DatasetColumn):
self.add_column(key, new_column)
elif len(new_column) > 1:
for col, value in new_column.items():
self.add_column(f"{key}.{col}", value)
else:
self.add_column(key, list(new_column.values())[0])
new_columns = descriptor.generate_data(self)
if isinstance(new_columns, DatasetColumn):
new_columns = {descriptor.alias: new_columns}
for col, value in new_columns.items():
self.add_column(_determine_desccriptor_column_name(col, self._data.columns.tolist()), value)

def add_descriptors(self, descriptors: List[Descriptor]):
for descriptor in descriptors:
Expand Down
4 changes: 3 additions & 1 deletion src/evidently/future/descriptors/_generate_descriptors.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ def create_llm_descriptor_functions(feature_class: Type[BaseLLMEval]):

args, kwargs = get_args_kwargs(feature_class)
kwargs["alias"] = ("Optional[str]", "None")
kwargs.pop("display_name", None)
has_display_name = kwargs.pop("display_name", None) is not None
args_str = ", ".join(f"{a}: {t}" for a, t in args.items())
if len(kwargs) > 0:
kwargs_str = ", ".join(f"{a}: {t} = {d}" for a, (t, d) in kwargs.items())
Expand All @@ -140,6 +140,8 @@ def create_llm_descriptor_functions(feature_class: Type[BaseLLMEval]):
else:
kwargs_str = ""
class_args = ", ".join(f"{k}={k}" for k in chain(args, kwargs) if k != "alias")
if has_display_name:
class_args += ", display_name=alias"
res = f"""
class {name}(FeatureDescriptor):
def __init__(self, column_name: str, {args_str}{kwargs_str}):
Expand Down
39 changes: 22 additions & 17 deletions src/evidently/future/descriptors/generated_descriptors.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,6 @@
from evidently.features.llm_judge import Uncertainty
from evidently.future.datasets import FeatureDescriptor

DEFAULT_LLM_PROVIDER = "openai"
DEFAULT_LLM_MODEL = "gpt-4o-mini"


class BERTScore(FeatureDescriptor):
def __init__(
Expand Down Expand Up @@ -373,8 +370,8 @@ class BiasLLMEval(FeatureDescriptor):
def __init__(
self,
column_name: str,
provider: str = DEFAULT_LLM_PROVIDER,
model: str = DEFAULT_LLM_MODEL,
provider: str = "openai",
model: str = "gpt-4o-mini",
additional_columns: Optional[Dict[str, str]] = None,
include_category: Optional[bool] = None,
include_score: Optional[bool] = None,
Expand All @@ -392,6 +389,7 @@ def __init__(
include_score=include_score,
include_reasoning=include_reasoning,
uncertainty=uncertainty,
display_name=alias,
).feature(column_name)
super().__init__(feature, alias=alias)

Expand Down Expand Up @@ -419,6 +417,7 @@ def __init__(
include_score=include_score,
include_reasoning=include_reasoning,
uncertainty=uncertainty,
display_name=alias,
).feature(column_name)
super().__init__(feature, alias=alias)

Expand All @@ -428,8 +427,8 @@ def __init__(
self,
column_name: str,
question: str,
provider: str = DEFAULT_LLM_PROVIDER,
model: str = DEFAULT_LLM_MODEL,
provider: str = "openai",
model: str = "gpt-4o-mini",
additional_columns: Optional[Dict[str, str]] = None,
include_category: Optional[bool] = None,
include_score: Optional[bool] = None,
Expand All @@ -448,6 +447,7 @@ def __init__(
include_score=include_score,
include_reasoning=include_reasoning,
uncertainty=uncertainty,
display_name=alias,
).feature(column_name)
super().__init__(feature, alias=alias)

Expand All @@ -456,8 +456,8 @@ class DeclineLLMEval(FeatureDescriptor):
def __init__(
self,
column_name: str,
provider: str = DEFAULT_LLM_PROVIDER,
model: str = DEFAULT_LLM_MODEL,
provider: str = "openai",
model: str = "gpt-4o-mini",
additional_columns: Optional[Dict[str, str]] = None,
include_category: Optional[bool] = None,
include_score: Optional[bool] = None,
Expand All @@ -475,6 +475,7 @@ def __init__(
include_score=include_score,
include_reasoning=include_reasoning,
uncertainty=uncertainty,
display_name=alias,
).feature(column_name)
super().__init__(feature, alias=alias)

Expand All @@ -483,9 +484,9 @@ class LLMEval(FeatureDescriptor):
def __init__(
self,
column_name: str,
provider: str,
model: str,
template: BaseLLMPromptTemplate,
provider: str = DEFAULT_LLM_PROVIDER,
model: str = DEFAULT_LLM_MODEL,
additional_columns: Optional[Dict[str, str]] = None,
subcolumn: Optional[str] = None,
alias: Optional[str] = None,
Expand All @@ -498,6 +499,7 @@ def __init__(
template=template,
additional_columns=additional_columns,
subcolumn=subcolumn,
display_name=alias,
).feature(column_name)
super().__init__(feature, alias=alias)

Expand All @@ -506,8 +508,8 @@ class NegativityLLMEval(FeatureDescriptor):
def __init__(
self,
column_name: str,
provider: str = DEFAULT_LLM_PROVIDER,
model: str = DEFAULT_LLM_MODEL,
provider: str = "openai",
model: str = "gpt-4o-mini",
additional_columns: Optional[Dict[str, str]] = None,
include_category: Optional[bool] = None,
include_score: Optional[bool] = None,
Expand All @@ -525,6 +527,7 @@ def __init__(
include_score=include_score,
include_reasoning=include_reasoning,
uncertainty=uncertainty,
display_name=alias,
).feature(column_name)
super().__init__(feature, alias=alias)

Expand All @@ -533,8 +536,8 @@ class PIILLMEval(FeatureDescriptor):
def __init__(
self,
column_name: str,
provider: str = DEFAULT_LLM_PROVIDER,
model: str = DEFAULT_LLM_MODEL,
provider: str = "openai",
model: str = "gpt-4o-mini",
additional_columns: Optional[Dict[str, str]] = None,
include_category: Optional[bool] = None,
include_score: Optional[bool] = None,
Expand All @@ -552,6 +555,7 @@ def __init__(
include_score=include_score,
include_reasoning=include_reasoning,
uncertainty=uncertainty,
display_name=alias,
).feature(column_name)
super().__init__(feature, alias=alias)

Expand All @@ -560,8 +564,8 @@ class ToxicityLLMEval(FeatureDescriptor):
def __init__(
self,
column_name: str,
provider: str = DEFAULT_LLM_PROVIDER,
model: str = DEFAULT_LLM_MODEL,
provider: str = "openai",
model: str = "gpt-4o-mini",
additional_columns: Optional[Dict[str, str]] = None,
include_category: Optional[bool] = None,
include_score: Optional[bool] = None,
Expand All @@ -579,5 +583,6 @@ def __init__(
include_score=include_score,
include_reasoning=include_reasoning,
uncertainty=uncertainty,
display_name=alias,
).feature(column_name)
super().__init__(feature, alias=alias)
8 changes: 4 additions & 4 deletions tests/features/test_llm_judge.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ def __init__(self, model: str, options: Options):
async def complete(self, messages: List[LLMMessage]) -> str:
text = messages[-1].content
cat = re.findall("___text_starts_here___\n(.*)\n___text_ends_here___", text)[0][0]
return json.dumps({"category": cat})
return json.dumps({"": cat})


@pytest.mark.asyncio
Expand All @@ -112,7 +112,7 @@ def test_llm_judge():

dd = DataDefinition(columns={}, reference_present=False)
fts = llm_judge.generate_features(data, dd, Options())
pd.testing.assert_frame_equal(fts, pd.DataFrame({"category": ["A", "B"]}))
pd.testing.assert_frame_equal(fts, pd.DataFrame({"": ["A", "B"]}))


@pytest.mark.asyncio
Expand All @@ -128,7 +128,7 @@ def test_multicol_llm_judge():

dd = DataDefinition(columns={}, reference_present=False)
fts = llm_judge.generate_features(data, dd, Options())
pd.testing.assert_frame_equal(fts, pd.DataFrame({"category": ["A", "B"]}))
pd.testing.assert_frame_equal(fts, pd.DataFrame({"": ["A", "B"]}))


def test_run_snapshot_with_llm_judge():
Expand All @@ -148,7 +148,7 @@ def test_run_snapshot_with_llm_judge():
{
"metric": "ColumnSummaryMetric",
"result": {
"column_name": "Negativity category",
"column_name": "Negativity",
"column_type": "cat",
"current_characteristics": {
"count": 2,
Expand Down

0 comments on commit b9ad296

Please sign in to comment.