Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Raise error when trying to resolve to display names with duplicates. #34

Merged
merged 6 commits into from
Oct 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions docs/errors.rst
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,6 @@ Errors

.. autoclass:: ResamplingValidationError
:show-inheritance:

.. autoclass:: DuplicateDisplayNameError
:show-inheritance:
10 changes: 10 additions & 0 deletions src/enlyze/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,13 @@ class ResamplingValidationError(EnlyzeError):
resampling interval is specified.

"""


class DuplicateDisplayNameError(EnlyzeError):
"""Variables with duplicate display names

Resolving variable UUIDs to display names would result in ambiguity because
multiple variables have the same display name. You should either fix the
duplicate variable display names via the ENLYZE App or don't request them at
the same time.
"""
25 changes: 22 additions & 3 deletions src/enlyze/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

import pandas

from enlyze.errors import DuplicateDisplayNameError
from enlyze.schema import dataframe_ensure_schema


Expand Down Expand Up @@ -143,6 +144,19 @@ def _display_names_as_column_names(self, columns: list[str]) -> list[str]:
if var.display_name
}

distinct_display_names = set(uuid_to_display_name.values())
if len(uuid_to_display_name) != len(distinct_display_names):
maybe_duplicate_display_names = list(uuid_to_display_name.values())
for name in distinct_display_names:
maybe_duplicate_display_names.remove(name)

raise DuplicateDisplayNameError(
", ".join(
f"'{duplicate_display_name}'"
for duplicate_display_name in set(maybe_duplicate_display_names)
)
)

return [uuid_to_display_name.get(var_uuid, var_uuid) for var_uuid in columns]

def to_dicts(self, use_display_names: bool = False) -> Iterator[dict[str, Any]]:
Expand All @@ -153,12 +167,14 @@ def to_dicts(self, use_display_names: bool = False) -> Iterator[dict[str, Any]]:
<python:datetime-naive-aware>` :py:class:`datetime.datetime` localized in UTC.

:param use_display_names: Whether to return display names instead of variable
UUIDs. If there is no display name fall back to UUID.
UUIDs. If there is no display name, fall back to UUID.

:raises: :exc:`~enlyze.errors.DuplicateDisplayNameError` when duplicate
display names would be returned instead of UUIDs.

:returns: Iterator over rows

"""

time_column, *variable_columns = self._columns

if use_display_names:
Expand All @@ -181,7 +197,10 @@ def to_dataframe(self, use_display_names: bool = False) -> pandas.DataFrame:
represented as a column named by its UUID.

:param use_display_names: Whether to return display names instead of variable
UUIDs. If there is no display name fall back to UUID.
UUIDs. If there is no display name, fall back to UUID.

:raises: :exc:`~enlyze.errors.DuplicateDisplayNameError` when duplicate
display names would be returned instead of UUIDs.

:returns: DataFrame with timeseries data indexed by time

Expand Down
24 changes: 23 additions & 1 deletion tests/enlyze/test_models.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
from dataclasses import replace
from datetime import datetime
from uuid import uuid4

import hypothesis.strategies as st
import pytest
from hypothesis import given

from enlyze.models import ProductionRun, ProductionRuns
from enlyze.errors import DuplicateDisplayNameError
from enlyze.models import ProductionRun, ProductionRuns, TimeseriesData, Variable


@given(runs=st.lists(st.from_type(ProductionRun), max_size=10))
Expand All @@ -28,3 +32,21 @@ def test_production_runs_to_dataframe_no_empty_columns_for_optional_dataclasses(

assert "quantity_total" not in df.columns
assert "average_throughput" in df.columns


@given(variable=st.builds(Variable, display_name=st.text(min_size=1)))
def test_timeseries_data_duplicate_display_names(variable):

variable_duplicate = replace(variable, uuid=uuid4())
variables = [variable, variable_duplicate]

data = TimeseriesData(
start=datetime.now(),
end=datetime.now(),
variables=variables,
_columns=["time", *[str(v.uuid) for v in variables]],
_records=[],
)

with pytest.raises(DuplicateDisplayNameError):
data.to_dataframe(use_display_names=True)
Loading