From 992ebdab09d6e8990cb3ff4651174c98eaad0fdb Mon Sep 17 00:00:00 2001 From: Katrine Lie Holm <18213981+katrilh@users.noreply.github.com> Date: Mon, 7 Aug 2023 12:43:30 +0200 Subject: [PATCH] Diff production model (#116) * initial diff on asset type * relationships * deleted more relationships without targets and cont from cdf * abstraction of from_cdf complete, linking not * any order linking of assets * a bit of cleanups * generalizing * WIP formatting diff * format_value_added() * changelog, version bump --------- Co-authored-by: Katrine Holm --- CHANGELOG.md | 8 +- cognite/powerops/_version.py | 2 +- .../clients/shop/api/shop_results_api.py | 2 +- cognite/powerops/resync/models/base.py | 344 ++++++++++++++++-- .../powerops/resync/models/cdf_resources.py | 59 ++- cognite/powerops/resync/models/helpers.py | 134 +++++++ .../powerops/resync/models/market/dayahead.py | 11 + cognite/powerops/resync/models/production.py | 120 +++--- pyproject.toml | 2 +- scripts/diff_production_model.py | 12 +- 10 files changed, 592 insertions(+), 102 deletions(-) create mode 100644 cognite/powerops/resync/models/helpers.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 1e3d82222..572b1e6a9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,12 @@ Changes are grouped as follows - `Fixed` for any bug fixes. - `Security` in case of vulnerabilities. +## [0.34.0] - 2023-08-03 + +### Added +* Ability to retrieve production data model from CDF with relationships to other resources. +* Ability to compare production data model in CDF with production data models generated by `resync`. + ## [0.33.1] - 2023-08-03 ### Changed @@ -45,7 +51,7 @@ Changes are grouped as follows ### Added -* Validation that all timeseries exists in CDF before creating relatioinships to them. +* Validation that all timeseries exists in CDF before creating relationships to them. ## [0.29.0] - 2023-07-20 diff --git a/cognite/powerops/_version.py b/cognite/powerops/_version.py index 49f85f29f..eab0e9cdf 100644 --- a/cognite/powerops/_version.py +++ b/cognite/powerops/_version.py @@ -1 +1 @@ -__version__ = "0.33.1" +__version__ = "0.34.0" diff --git a/cognite/powerops/clients/shop/api/shop_results_api.py b/cognite/powerops/clients/shop/api/shop_results_api.py index c895e02ef..853640791 100644 --- a/cognite/powerops/clients/shop/api/shop_results_api.py +++ b/cognite/powerops/clients/shop/api/shop_results_api.py @@ -54,7 +54,7 @@ def retrieve_objective_function(self, shop_run: ShopRun) -> ObjectiveFunction: relationships = retrieve_relationships_from_source_ext_id( self._client, shop_run.shop_run_event.external_id, - RelationshipLabel.OBJECTIVE_SEQUENCE.value, + RelationshipLabel.OBJECTIVE_SEQUENCE, target_types=["sequence"], ) sequences = self._client.sequences.retrieve_multiple(external_ids=[r.target_external_id for r in relationships]) diff --git a/cognite/powerops/resync/models/base.py b/cognite/powerops/resync/models/base.py index eee46da83..d291e0f8b 100644 --- a/cognite/powerops/resync/models/base.py +++ b/cognite/powerops/resync/models/base.py @@ -3,19 +3,29 @@ import json from abc import ABC -from typing import ClassVar, Iterable, Optional, Union, TypeVar, get_args +from deepdiff import DeepDiff +from deepdiff.model import PrettyOrderedSet + +from pathlib import Path +from typing import Any, ClassVar, Iterable, Optional, Union, TypeVar, get_args from typing import Type as TypingType -from types import GenericAlias +from pydantic import BaseModel, ConfigDict from cognite.client import CogniteClient from cognite.client.data_classes import Asset, Label, Relationship, TimeSeries from cognite.client.data_classes.data_modeling.instances import EdgeApply, NodeApply -from pydantic import BaseModel, ConfigDict from cognite.powerops.cdf_labels import AssetLabel, RelationshipLabel from cognite.powerops.clients.data_classes._core import DomainModelApply, InstancesApply from cognite.powerops.resync.models.cdf_resources import CDFFile, CDFSequence - +from cognite.powerops.resync.models.helpers import ( + format_change_binary, + format_value_added, + format_value_removed, + isinstance_list, + match_field_from_relationship, + pydantic_model_class_candidate, +) _T_Type = TypeVar("_T_Type") @@ -36,7 +46,7 @@ def _fields_of_type(self, type_: TypingType[_T_Type]) -> list[_T_Type]: value = getattr(self, field_name) if not value: continue - elif isinstance(value, list) and isinstance(value[0], type_): + elif isinstance_list(value, type_): output.extend(value) elif isinstance(value, type_): output.append(value) @@ -48,6 +58,8 @@ class AssetType(ResourceType, ABC): label: ClassVar[Union[AssetLabel, str]] model_config: ClassVar[ConfigDict] = ConfigDict(arbitrary_types_allowed=True) parent_description: ClassVar[Optional[str]] = None + _instantiated_assets: ClassVar[dict[str, AssetType]] = defaultdict(dict) + name: str description: Optional[str] = None _external_id: Optional[str] = None @@ -104,15 +116,22 @@ def relationships(self) -> list[Relationship]: value = getattr(self, field_name) if not value: continue - if isinstance(value, list) and value and isinstance(value[0], AssetType): - for target in value: - relationships.append(self._create_relationship(target.external_id, "ASSET", target.type_)) + if isinstance_list(value, AssetType): + relationships.extend( + self._create_relationship(target.external_id, "ASSET", target.type_) for target in value + ) elif isinstance(value, AssetType): target_type = value.type_ if self.type_ == "plant" and value.type_ == "reservoir": target_type = "inlet_reservoir" relationships.append(self._create_relationship(value.external_id, "ASSET", target_type)) - elif any(cdf_type in str(field.annotation) for cdf_type in [CDFSequence.__name__, TimeSeries.__name__]): + elif any( + cdf_type in str(field.annotation) + for cdf_type in [ + CDFSequence.__name__, + TimeSeries.__name__, + ] + ): if TimeSeries.__name__ in str(field.annotation): target_type = "TIMESERIES" elif CDFSequence.__name__ in str(field.annotation): @@ -121,10 +140,22 @@ def relationships(self) -> list[Relationship]: raise ValueError(f"Unexpected type {field.annotation}") if isinstance(value, list): - for target in value: - relationships.append(self._create_relationship(target.external_id, target_type, field_name)) + relationships.extend( + self._create_relationship( + target.external_id, + target_type, + field_name, + ) + for target in value + ) else: - relationships.append(self._create_relationship(value.external_id, target_type, field_name)) + relationships.append( + self._create_relationship( + value.external_id, + target_type, + field_name, + ) + ) return relationships def as_asset(self): @@ -137,11 +168,7 @@ def as_asset(self): ): continue value = getattr(self, field_name) - if ( - value is None - or isinstance(value, AssetType) - or (isinstance(value, list) and value and isinstance(value[0], AssetType)) - ): + if value is None or isinstance(value, AssetType) or isinstance_list(value, AssetType): continue if isinstance(value, list) and not value: continue @@ -196,10 +223,154 @@ def _create_relationship( ) @classmethod - def from_asset(cls: TypingType["T_Asset_Type"], asset: Asset) -> "T_Asset_Type": - "Not yet implemented: CDF relationships" + def _parse_asset_metadata(cls, metadata: dict[str, Any] = None) -> dict[str, Any]: raise NotImplementedError() + @classmethod + def _from_asset( + cls, + asset: Asset, + additional_fields: Optional[dict[str, Any]] = None, + ) -> T_Asset_Type: + if not additional_fields: + additional_fields = {} + metadata = cls._parse_asset_metadata(asset.metadata) + instance = cls( + _external_id=asset.external_id, + name=asset.name, + description=asset.description, + **metadata, + **additional_fields, + ) + AssetType._instantiated_assets[asset.external_id] = instance + return instance + + @classmethod + def _field_name_asset_resource_class(cls) -> Iterable[tuple[str, TypingType[AssetType]]]: + """AssetType fields are are of type list[AssetType] or Optional[AssetType]""" + # todo? method is identical to _field_name_asset_resource_class in AssetModel + # * unsure how to reuse? + for field_name in cls.model_fields: + class_ = cls.model_fields[field_name].annotation + if pydantic_model_class_candidate(class_): + asset_resource_class = get_args(class_)[0] + if issubclass(asset_resource_class, AssetType): + yield field_name, asset_resource_class + + @classmethod + def _fetch_and_set_metadata( + cls, + client: CogniteClient, + additional_fields: dict[str, Union[list, None]], + asset_external_id: str, + fetch_metadata: bool, + fetch_content: bool, + ) -> dict[str, Any]: + """Fetches resources that are linked with relationships to the asset.""" + relationships = client.relationships.list( + source_external_ids=[asset_external_id], + source_types=["asset"], + target_types=["timeseries", "asset", "sequence", "file"], + limit=-1, + ) + for r in relationships: + field = match_field_from_relationship(cls.model_fields.keys(), r) + target_type = r.target_type.lower() + relationship_target = None + + if target_type == "asset": + if r.target_external_id in AssetType._instantiated_assets: + relationship_target = AssetType._instantiated_assets[r.target_external_id] + + else: + target_class = [y for x, y in cls._field_name_asset_resource_class() if field == x and y][0] + relationship_target = target_class.from_cdf( + client=client, + external_id=r.target_external_id, + fetch_metadata=fetch_metadata, + fetch_content=fetch_content, + ) + + elif target_type == "timeseries": + relationship_target = client.time_series.retrieve(external_id=r.target_external_id) + elif target_type == "sequence": + relationship_target = CDFSequence.from_cdf(client, r.target_external_id, fetch_content) + elif target_type == "file": + relationship_target = CDFFile.from_cdf(client, r.target_external_id, fetch_content) + else: + raise ValueError(f"Cannot handle target type {r.target_type}") + + # Add relationship target to additional fields in-place + if isinstance(additional_fields[field], list): + additional_fields[field].append(relationship_target) + else: + additional_fields[field] = relationship_target + + @classmethod + def from_cdf( + cls, + client: CogniteClient, + external_id: Optional[str] = "", + asset: Optional[Asset] = None, + fetch_metadata: bool = True, + fetch_content: bool = False, + ) -> T_Asset_Type: + """ + Fetch an asset from CDF and convert it to a model instance. + Optionally fetch relationships targets and content by setting + `fetch_metadata` and optionally `fetch_content` + + By default, content of files/sequences/time series is not fetched. + This can be enabled by setting `fetch_content=True`. + """ + + if asset and external_id: + raise ValueError("Only one of asset and external_id can be provided") + if external_id: + # Check if asset has already been instantiated, eg. by a relationship + if external_id in AssetType._instantiated_assets: + return AssetType._instantiated_assets[external_id] + else: + asset = client.assets.retrieve(external_id=external_id) + if not asset: + raise ValueError(f"Could not retrieve asset with {external_id=}") + + # Prepare non-asset metadata fields + additional_fields = { + field: [] if "list" in str(field_info.annotation) else None + for field, field_info in cls.model_fields.items() + if field in [x for x, _ in cls._field_name_asset_resource_class()] + or any(cdf_type in str(field_info.annotation) for cdf_type in [CDFSequence.__name__, TimeSeries.__name__]) + } + + # Populate non-asset metadata fields according to relationships/flags + # `Additional_fields` is modified in-place by `_fetch_metadata` + if fetch_metadata: + cls._fetch_and_set_metadata( + client, + additional_fields, + asset.external_id, + fetch_metadata, + fetch_content, + ) + + return cls._from_asset(asset, additional_fields) + + def _asset_type_prepare_for_diff(self: T_Asset_Type) -> dict[str, dict]: + for model_field in self.model_fields: + field = getattr(self, model_field) + if isinstance(field, AssetType): + # Only include external id in diff + setattr(self, model_field, field.external_id) + + elif isinstance_list(field, AssetType): + # Sort bt external id to have consistent order for diff + setattr(self, model_field, sorted(map(lambda x: x.external_id, field))) + elif isinstance(field, Path): + # remove path from diff + setattr(self, model_field, None) + return self + T_Asset_Type = TypeVar("T_Asset_Type", bound=AssetType) @@ -234,7 +405,7 @@ def _fields_of_type(self, type_: TypingType[_T_Type]) -> Iterable[_T_Type]: value = getattr(self, field_name) if isinstance(value, type_): yield value - elif isinstance(value, list) and value and isinstance(value[0], type_): + elif isinstance_list(value, type_): yield from value @property @@ -293,10 +464,13 @@ def _asset_types(self) -> Iterable[AssetType]: yield from (item for item in self._resource_types() if isinstance(item, AssetType)) @classmethod - def _asset_types_and_field_names(cls) -> Iterable[tuple[str, TypingType[AssetType]]]: + def _field_name_asset_resource_class(cls) -> Iterable[tuple[str, TypingType[AssetType]]]: + """AssetType fields are are of type list[AssetType] or Optional[AssetType]""" + # todo? method is identical to _field_name_asset_resource_class in AssetType + # * unsure how to reuse? for field_name in cls.model_fields: class_ = cls.model_fields[field_name].annotation - if isinstance(class_, GenericAlias): + if pydantic_model_class_candidate(class_): asset_resource_class = get_args(class_)[0] if issubclass(asset_resource_class, AssetType): yield field_name, asset_resource_class @@ -309,20 +483,74 @@ def summary(self) -> dict[str, dict[str, dict[str, int]]]: return summary @classmethod - def from_cdf(cls: TypingType[T_Asset_Model], client: CogniteClient) -> T_Asset_Model: + def from_cdf( + cls: TypingType[T_Asset_Model], + client: CogniteClient, + fetch_metadata: bool = True, + fetch_content: bool = False, + ) -> T_Asset_Model: + if fetch_content and not fetch_metadata: + raise ValueError("Cannot fetch content without also fetching metadata") + output = defaultdict(list) - for field_name, asset_cls in cls._asset_types_and_field_names(): + + for field_name, asset_cls in cls._field_name_asset_resource_class(): assets = client.assets.retrieve_subtree(external_id=asset_cls.parent_external_id) for asset in assets: if asset.external_id == asset_cls.parent_external_id: continue - instance = asset_cls.from_asset(asset) + instance = asset_cls.from_cdf( + client=client, + asset=asset, + fetch_metadata=fetch_metadata, + fetch_content=fetch_content, + ) output[field_name].append(instance) + return cls(**output) - def difference(self: T_Asset_Model, other: T_Asset_Model) -> dict: + def _prepare_for_diff(self: T_Asset_Model) -> dict[str:dict]: raise NotImplementedError() + def difference(self: T_Asset_Model, other: T_Asset_Model, print_string: bool = True) -> dict: + if type(self) != type(other): + raise ValueError("Cannot compare these models of different types.") + + self_dump = self._prepare_for_diff() + other_dump = other._prepare_for_diff() + diff_dict = {} + for model_field in self_dump: + if deep_diff := DeepDiff( + self_dump[model_field], + other_dump[model_field], + ignore_type_in_groups=[(float, int, type(None))], + exclude_regex_paths=[ + r"(.+?)._cognite_client", + r"(.+?).last_updated_time", + r"(.+?).parent_id", + r"(.+?).root_id]", + r"(.+?).data_set_id", + r"(.+?).created_time", + r"(.+?)lastUpdatedTime", + r"(.+?)createdTime", + r"(.+?)parentId", + r"(.+?)\.id", + # Relevant metadata should already be included in the model + r"(.+?)metadata", + ], + ).to_dict(): + diff_dict[model_field] = deep_diff + + if print_string: + _diff_formatter = _DiffFormatter( + full_diff_per_field=diff_dict, + model_a=self_dump, + model_b=other_dump, + ) + print(_diff_formatter.format_as_string()) + + return diff_dict + T_Asset_Model = TypeVar("T_Asset_Model", bound=AssetModel) @@ -359,3 +587,67 @@ def summary(self) -> dict[str, dict[str, dict[str, int]]]: summary[self.model_name]["cdf"]["nodes"] = len(instances.nodes) summary[self.model_name]["cdf"]["edges"] = len(instances.edges) return summary + + +class _DiffFormatter: + def __init__(self, full_diff_per_field: dict[str, dict], model_a: dict, model_b: dict): + self.full_diff_per_field = full_diff_per_field + self.model_a = model_a + self.model_b = model_b + + self.str_builder: list = None + + def _format_per_field(self, field_name: str, field_diff: dict[str, Union[dict, PrettyOrderedSet]]): + self.str_builder.extend( + ( + "\n\n========================== ", + *field_name.title().split("_"), + " ==========================\n", + ) + ) + # Might need a better fallback for names + self.str_builder.append("Indexes and names:\n\t") + names = [ + f'{i}:{d.get("display_name", False) or d.get("name", "")}, ' for i, d in enumerate(self.model_a[field_name]) + ] + + self.str_builder.extend(names) + self.str_builder.append("\n\n") + + for diff_type, diffs in field_diff.items(): + is_iterable = "iterable" in diff_type + + if diff_type in ("type_changes", "values_changed"): + self.str_builder.extend( + ( + f'The following values have changed {"type and value" if "type" in diff_type else ""}:\n', + *format_change_binary(diffs), + "\n", + ), + ) + elif "removed" in diff_type: + self.str_builder.extend( + ( + f"The following {'values' if is_iterable else 'entries'} have been removed:\n", + *format_value_removed(diffs), + "\n", + ) + ) + elif "added" in diff_type: + self.str_builder.extend( + ( + f"The following {'values' if is_iterable else 'entries'} have been added:\n", + *format_value_added(diffs, self.model_b[field_name]), + "\n", + ) + ) + + else: + print(f"cannot handle {diff_type=}") + + def format_as_string(self) -> str: + self.str_builder = [] + for field_name, field_diff in self.full_diff_per_field.items(): + self._format_per_field(field_name, field_diff) + + return "".join(self.str_builder) diff --git a/cognite/powerops/resync/models/cdf_resources.py b/cognite/powerops/resync/models/cdf_resources.py index f56a51084..bae050078 100644 --- a/cognite/powerops/resync/models/cdf_resources.py +++ b/cognite/powerops/resync/models/cdf_resources.py @@ -1,9 +1,10 @@ from __future__ import annotations -from abc import ABC, abstractmethod +from abc import ABC, abstractclassmethod, abstractmethod from typing import Any, ClassVar, Optional import pandas as pd +from cognite.client import CogniteClient from cognite.client.data_classes import FileMetadata, Sequence from pydantic import BaseModel, ConfigDict @@ -30,10 +31,25 @@ def dump(self, camel_case: bool = False) -> dict[str, Any]: dump.pop(read_only_field, None) return dump + @abstractclassmethod + def from_cdf( + cls, + client: CogniteClient, + resource_ext_id: str, + fetch_content: bool = False, + ) -> _CDFResource: + ... + class CDFSequence(_CDFResource): sequence: Sequence - content: pd.DataFrame + content: Optional[pd.DataFrame] + + def __repr__(self) -> str: + return f"CDFSequence(external_id={self.external_id})" + + def __str__(self) -> str: + return self.__repr__() @property def external_id(self): @@ -47,14 +63,53 @@ def _set_data_set_id(self, data_set_id: int): def _dump(self, camel_case: bool = False) -> dict[str, Any]: return self.sequence.dump(camel_case=camel_case) + @classmethod + def from_cdf( + cls, + client: CogniteClient, + resource_ext_id: str, + fetch_content: bool = False, + ) -> CDFSequence: + sequence = client.sequences.retrieve(external_id=resource_ext_id) + if fetch_content: + # limit defaults to 100, might not be an issue + content = client.sequences.data.retrieve_dataframe( + external_id=resource_ext_id, + start=0, + end=None, + ) + else: + content = None + return cls(sequence=sequence, content=content) + class CDFFile(_CDFResource): meta: FileMetadata content: Optional[bytes] = None + def __repr__(self) -> str: + return f"CDFFile(external_id={self.external_id})" + + def __str__(self) -> str: + return self.__repr__() + @property def external_id(self): return self.meta.external_id def _dump(self, camel_case: bool = False) -> dict[str, Any]: return self.meta.dump(camel_case=camel_case) + + @classmethod + def from_cdf( + cls, + client: CogniteClient, + resource_ext_id: str, + fetch_content: bool = False, + ) -> CDFFile: + meta = client.files.retrieve(external_id=resource_ext_id) + if fetch_content: + content = client.files.download_bytes(external_id=resource_ext_id) + else: + content = None + return cls(meta=meta, content=content) diff --git a/cognite/powerops/resync/models/helpers.py b/cognite/powerops/resync/models/helpers.py new file mode 100644 index 000000000..065974026 --- /dev/null +++ b/cognite/powerops/resync/models/helpers.py @@ -0,0 +1,134 @@ +import doctest +import operator + +from functools import reduce +from pprint import pformat +from deepdiff.model import PrettyOrderedSet +from typing import Any, Union, Type, get_args, get_origin +from types import GenericAlias + + +from cognite.client.data_classes import Relationship + + +def isinstance_list(value: Any, type_: Type): + return isinstance(value, list) and value and isinstance(value[0], type_) + + +def match_field_from_relationship(model_fields: list[str], relationship: Relationship) -> str: + """Find the field on the model that matches the relationship using the label""" + if len(relationship.labels) != 1: + raise ValueError(f"Expected one label in {relationship.labels=}") + label = relationship.labels[0].external_id.split(".")[-1] + + candidates = list(filter(lambda k: label in k, model_fields)) + + if len(candidates) != 1: + raise ValueError(f"Could not match {relationship.external_id=} to {model_fields=}") + + return candidates[0] + + +def pydantic_model_class_candidate(class_: type = None) -> bool: + """GenericAlias is a potential list, get origin checks an optional field""" + return isinstance(class_, GenericAlias) or (get_origin(class_) is Union and type(None) in get_args(class_)) + + +def format_change_binary( + deep_diff: dict[str, dict], +) -> list[str]: + """ + Formats a dict of changes with updated values to a list of strings + + >>> deep_diff = { + ... "root[0][plants][name]": { + ... "old_value": "Old name", + ... "new_value": "New name", + ... }, + ... } + >>> format_change_binary(deep_diff) + [' * [0][plants][name]:\\n', "\\t- 'Old name'\\t", ' --> ', "'New name'", '\\n'] + """ + str_builder = [] + for path_, change_dict in deep_diff.items(): + str_builder.extend( + ( + f" * {path_.replace('root', '') }:\n", + f'\t- {pformat(change_dict.get("old_value"))}\t', + " --> ", + f'{pformat(change_dict.get("new_value"))}', + "\n", + ) + ) + return str_builder + + +def format_value_removed(deep_diff: dict[str, dict]) -> list[str]: + """ + Formats a dict of values that were removed to a list of strings + + >>> deep_diff = { + ... "root[0]": { + ... "description": "None", + ... "name": "Name" + ... }, + ... } + >>> format_value_removed(deep_diff) + [' * [0]:\\n', "\\t- {'description': 'None', 'name': 'Name'}\\n"] + + """ + str_builder = [] + for _path, removed in deep_diff.items(): + str_builder.extend( + ( + f" * {_path.replace('root', '')}:\n", + f"\t- {pformat(removed)}\n", + ) + ) + return str_builder + + +def _get_from_deep_diff_path(deep_diff_path: str, lookup_model: dict) -> Any: + """ + Similar to `format_deep_diff_path` and `get_dict_dot_keys` in + `cognite.powerops.clients.shop.data_classes.helpers` but modified + to work with the deepdiff format separated from yaml formats + + >>> _path = "root['key_1'][0]['key_3']" + >>> _lookup_model = { + ... "key_1": [{"key_2": "value_2", "key_3": "value_3"}] + ... } + >>> _get_from_deep_diff_path(_path, _lookup_model) + 'value_3' + + """ + keys = [ + int(k) if k.isdigit() else k + for k in deep_diff_path.replace("root[", "").replace("'", "").removesuffix("]").split("][") + ] + try: + item = reduce(operator.getitem, keys, lookup_model) + except KeyError: + item = f"Could not retrieve at {deep_diff_path}" + return item + + +def format_value_added(deep_diff: PrettyOrderedSet, lookup_model: dict) -> list[str]: + """ + Formats a dict of values that were added to a list of strings + The deep_diff does not contain the new value, so it is fetched from the lookup_model + """ + str_builder = [] + _path: str = None # type: ignore + for _path in deep_diff: + str_builder.extend( + ( + f" * {_path.replace('root', '')}:\n", + f"\t- {pformat(_get_from_deep_diff_path(_path, lookup_model))}\n", + ) + ) + return str_builder + + +if __name__ == "__main__": + doctest.testmod() diff --git a/cognite/powerops/resync/models/market/dayahead.py b/cognite/powerops/resync/models/market/dayahead.py index cdaa47d9e..820d223d1 100644 --- a/cognite/powerops/resync/models/market/dayahead.py +++ b/cognite/powerops/resync/models/market/dayahead.py @@ -30,6 +30,17 @@ class DayAheadProcess(Process): bid_matrix_generator_config: Optional[CDFSequence] = None incremental_mapping: list[CDFSequence] = Field(default_factory=list) + # @classmethod + # # i teorien, løftes til AssetType... kanskje + # def from_cdf( + # cls, + # client, + # external_id: str, + # fetch_relationships: bool = False, + # fetch_content: bool = False, + # ) -> DayAheadProcess: + # raise NotImplementedError() + class NordPoolMarket(Market): max_price: float diff --git a/cognite/powerops/resync/models/production.py b/cognite/powerops/resync/models/production.py index 202220384..6437b589a 100644 --- a/cognite/powerops/resync/models/production.py +++ b/cognite/powerops/resync/models/production.py @@ -2,7 +2,7 @@ import json from pathlib import Path -from typing import ClassVar, Optional, Union +from typing import Any, ClassVar, Optional, Union from cognite.client.data_classes import Asset, TimeSeries from pydantic import ConfigDict, Field @@ -10,6 +10,7 @@ from cognite.powerops.cdf_labels import AssetLabel from cognite.powerops.resync.models.base import AssetModel, AssetType, NonAssetType from cognite.powerops.resync.models.cdf_resources import CDFSequence +from cognite.powerops.resync.models.helpers import isinstance_list class Generator(AssetType): @@ -23,18 +24,12 @@ class Generator(AssetType): turbine_efficiency_curve: Optional[CDFSequence] = None @classmethod - def from_asset(cls, asset: Asset) -> Generator: - return cls( - _external_id=asset.external_id, - name=asset.name, - description=asset.description, - p_min=float(asset.metadata.get("p_min", 0.0)), - penstock=asset.metadata.get("penstock", ""), - startcost=float(asset.metadata.get("startcost", 0.0)), - start_stop_cost_time_series=None, - generator_efficiency_curve=None, - turbine_efficiency_curve=None, - ) + def _parse_asset_metadata(cls, asset_metadata: dict[str, str]) -> dict[str, Any]: + return { + "p_min": float(asset_metadata.get("p_min", 0.0)), + "penstock": asset_metadata.get("penstock", ""), + "startcost": float(asset_metadata.get("startcost", 0.0)), + } class Reservoir(AssetType): @@ -44,14 +39,11 @@ class Reservoir(AssetType): ordering: str @classmethod - def from_asset(cls, asset: Asset) -> Reservoir: - return cls( - _external_id=asset.external_id, - name=asset.name, - description=asset.description, - display_name=asset.metadata.get("display_name", ""), - ordering=asset.metadata.get("ordering", ""), - ) + def _parse_asset_metadata(cls, asset_metadata: dict[str, str]) -> dict[str, Any]: + return { + "display_name": asset_metadata.get("display_name", ""), + "ordering": asset_metadata.get("ordering", ""), + } class Plant(AssetType): @@ -75,35 +67,24 @@ class Plant(AssetType): head_direct_time_series: Optional[TimeSeries] = None @classmethod - def from_asset(cls, asset: Asset) -> Plant: - penstock_head_loss_factors_raw: str = asset.metadata.get("penstock_head_loss_factors", "") + def _parse_asset_metadata(cls, asset_metadata: dict[str, str]) -> dict[str, Any]: + penstock_head_loss_factors_raw: str = asset_metadata.get("penstock_head_loss_factors", "") try: penstock_head_loss_factors = json.loads(penstock_head_loss_factors_raw) if not isinstance(penstock_head_loss_factors, dict): raise TypeError except (json.JSONDecodeError, TypeError): penstock_head_loss_factors = {} - return cls( - _external_id=asset.external_id, - name=asset.name, - description=asset.description, - display_name=asset.metadata.get("display_name", ""), - ordering=asset.metadata.get("ordering", ""), - head_loss_factor=float(asset.metadata.get("head_loss_factor", 0.0)), - outlet_level=float(asset.metadata.get("outlet_level", 0.0)), - p_min=float(asset.metadata.get("p_min", 0.0)), - p_max=float(asset.metadata.get("p_max", 0.0)), - penstock_head_loss_factors=penstock_head_loss_factors, - generators=[], - inlet_reservoir=None, - p_min_time_series=None, - p_max_time_series=None, - water_value_time_series=None, - feeding_fee_time_series=None, - outlet_level_time_series=None, - inlet_level_time_series=None, - head_direct_time_series=None, - ) + + return { + "display_name": asset_metadata.get("display_name", ""), + "ordering": asset_metadata.get("ordering", ""), + "head_loss_factor": float(asset_metadata.get("head_loss_factor", 0.0)), + "outlet_level": float(asset_metadata.get("outlet_level", 0.0)), + "p_min": float(asset_metadata.get("p_min", 0.0)), + "p_max": float(asset_metadata.get("p_max", 0.0)), + "penstock_head_loss_factors": penstock_head_loss_factors, + } class WaterCourseShop(NonAssetType): @@ -122,18 +103,13 @@ class Watercourse(AssetType): production_obligation_time_series: list[TimeSeries] = Field(default_factory=list) @classmethod - def from_asset(cls, asset: Asset) -> Watercourse: - return cls( - _external_id=asset.external_id, - name=asset.name, - description=asset.description, - shop=WaterCourseShop(penalty_limit=asset.metadata.get("penalty_limit", "")), - config_version=None, - model_file=None, - processed_model_file=None, - plants=[], - production_obligation_time_series=[], - ) + def _parse_asset_metadata(cls, asset_metadata: dict[str, str]) -> dict[str, Any]: + return { + "config_version": "", + "shop": WaterCourseShop(penalty_limit=asset_metadata.get("shop:penalty_limit", "")), + "model_file": None, + "processed_model_file": None, + } class PriceArea(AssetType): @@ -144,21 +120,31 @@ class PriceArea(AssetType): watercourses: list[Watercourse] = Field(default_factory=list) @classmethod - def from_asset(cls, asset: Asset) -> PriceArea: - return cls( - _external_id=asset.external_id, - name=asset.name, - description=asset.description, - dayahead_price_time_series=None, - plants=[], - watercourses=[], - ) + def _parse_asset_metadata(cls, asset_metadata: dict[str, str]) -> dict[str, Any]: + # Maintain the AssetType structure + return {} class ProductionModel(AssetModel): root_asset: ClassVar[Asset] = Asset(external_id="power_ops", name="PowerOps") + plants: list[Plant] = Field(default_factory=list) + generators: list[Generator] = Field(default_factory=list) reservoirs: list[Reservoir] = Field(default_factory=list) watercourses: list[Watercourse] = Field(default_factory=list) price_areas: list[PriceArea] = Field(default_factory=list) - plants: list[Plant] = Field(default_factory=list) - generators: list[Generator] = Field(default_factory=list) + + def _prepare_for_diff(self: ProductionModel) -> dict: + clone = self.model_copy(deep=True) + + for model_field in clone.model_fields: + field_value = getattr(clone, model_field) + if isinstance_list(field_value, AssetType): + # Sort the asset types to have comparable order for diff + _sorted = sorted(field_value, key=lambda x: x.external_id) + # Prepare each asset type for diff + _prepared = map(lambda x: x._asset_type_prepare_for_diff(), _sorted) + setattr(clone, model_field, list(_prepared)) + elif isinstance(field_value, AssetType): + field_value._asset_type_prepare_for_diff() + # Some fields are have been set to their external_id which gives a warning we can ignore + return clone.model_dump(warnings=False) diff --git a/pyproject.toml b/pyproject.toml index a1fb5f2d3..130d68a14 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "cognite-power-ops" -version = "0.33.1" +version = "0.34.0" description = "SDK for power markets operations on Cognite Data Fusion" readme = "README.md" authors = ["Cognite "] diff --git a/scripts/diff_production_model.py b/scripts/diff_production_model.py index ed81d3bb1..ee9b93537 100644 --- a/scripts/diff_production_model.py +++ b/scripts/diff_production_model.py @@ -1,19 +1,25 @@ from cognite.powerops.clients.powerops_client import get_powerops_client from cognite.powerops.resync.config.resync_config import ProductionConfig -from cognite.powerops.resync.models.production import ProductionModel from cognite.powerops.resync.to_models.to_production_model import to_production_model +from cognite.powerops.resync.models.production import ProductionModel from tests.constants import REPO_ROOT + DEMO_DATA = REPO_ROOT / "tests" / "test_unit" / "test_bootstrap" / "data" / "demo" def main(): client = get_powerops_client().cdf + cdf_model = ProductionModel.from_cdf( + client, + fetch_metadata=True, + fetch_content=True, + ) + config = ProductionConfig.load_yamls(DEMO_DATA / "production", instantiate=True) local_model = to_production_model(config) - cdf_model = ProductionModel.from_cdf(client) - print(local_model.difference(cdf_model)) + _ = local_model.difference(cdf_model) if __name__ == "__main__":