From 9714270f1b8bec58690f5c49ab1ac6290c14ff64 Mon Sep 17 00:00:00 2001 From: Anders Albert <60234212+doctrino@users.noreply.github.com> Date: Mon, 18 Nov 2024 07:53:43 +0100 Subject: [PATCH] Read yaml (#753) * feat; yaml read api * build; changelog * refactor: lookup system containers * style: language * style: wording * style: . * refactor; skip validation of core model * refactor; improved debugging * Apply suggestions from code review * refactor: revert all exception for cognite spaces --- cognite/neat/_rules/importers/_dms2rules.py | 10 +++- cognite/neat/_rules/models/_base_rules.py | 4 ++ cognite/neat/_rules/models/dms/_schema.py | 7 +++ cognite/neat/_rules/models/dms/_validation.py | 2 +- cognite/neat/_session/_read.py | 55 ++++++++++++++++++- cognite/neat/_session/_to.py | 29 +++++++--- docs/CHANGELOG.md | 5 ++ 7 files changed, 101 insertions(+), 11 deletions(-) diff --git a/cognite/neat/_rules/importers/_dms2rules.py b/cognite/neat/_rules/importers/_dms2rules.py index 122030b9e..79a7f3c05 100644 --- a/cognite/neat/_rules/importers/_dms2rules.py +++ b/cognite/neat/_rules/importers/_dms2rules.py @@ -1,5 +1,5 @@ from collections import Counter -from collections.abc import Collection, Sequence +from collections.abc import Collection, Iterable, Sequence from datetime import datetime, timezone from pathlib import Path from typing import Literal, cast @@ -86,6 +86,14 @@ def __init__( self._all_containers_by_id.update(schema.reference.containers.items()) self._all_views_by_id.update(schema.reference.views.items()) + def update_referenced_containers(self, containers: Iterable[dm.ContainerApply]) -> None: + """Update the referenced containers. This is useful to add Cognite containers identified after the root schema + is read""" + for container in containers: + if container.as_id() in self._all_containers_by_id: + continue + self._all_containers_by_id[container.as_id()] = container + @classmethod def from_data_model_id( cls, diff --git a/cognite/neat/_rules/models/_base_rules.py b/cognite/neat/_rules/models/_base_rules.py index 1bcb7cfec..4727e73cb 100644 --- a/cognite/neat/_rules/models/_base_rules.py +++ b/cognite/neat/_rules/models/_base_rules.py @@ -293,6 +293,10 @@ class SheetRow(SchemaModel): def _identifier(self) -> tuple[Hashable, ...]: raise NotImplementedError() + def __repr__(self) -> str: + # Simplified representation of the object for debugging + return f"{self.__class__.__name__}({self._identifier()})" + T_SheetRow = TypeVar("T_SheetRow", bound=SheetRow) diff --git a/cognite/neat/_rules/models/dms/_schema.py b/cognite/neat/_rules/models/dms/_schema.py index 0aff93372..efae07916 100644 --- a/cognite/neat/_rules/models/dms/_schema.py +++ b/cognite/neat/_rules/models/dms/_schema.py @@ -708,6 +708,13 @@ def referenced_spaces(self, include_indirect_references: bool = True) -> set[str referenced_spaces |= {s.space for s in self.spaces.values()} return referenced_spaces + def referenced_container(self) -> set[dm.ContainerId]: + referenced_containers = { + container for view in self.views.values() for container in view.referenced_containers() + } + referenced_containers |= set(self.containers.keys()) + return referenced_containers + def as_read_model(self) -> dm.DataModel[dm.View]: if self.data_model is None: raise ValueError("Data model is not defined") diff --git a/cognite/neat/_rules/models/dms/_validation.py b/cognite/neat/_rules/models/dms/_validation.py index 40380df91..56ad2b7af 100644 --- a/cognite/neat/_rules/models/dms/_validation.py +++ b/cognite/neat/_rules/models/dms/_validation.py @@ -192,7 +192,7 @@ def _referenced_views_and_containers_are_existing_and_proper_size(self) -> None: } for prop_no, prop in enumerate(self.properties): - if prop.container and (container_id := prop.container.as_id()) not in defined_containers: + if prop.container and ((container_id := prop.container.as_id()) not in defined_containers): errors.append( ResourceNotDefinedError( identifier=container_id, diff --git a/cognite/neat/_session/_read.py b/cognite/neat/_session/_read.py index 039866f48..3fb6145ef 100644 --- a/cognite/neat/_session/_read.py +++ b/cognite/neat/_session/_read.py @@ -1,17 +1,19 @@ import tempfile from datetime import datetime, timezone from pathlib import Path -from typing import Any +from typing import Any, Literal from cognite.client import CogniteClient from cognite.client.data_classes.data_modeling import DataModelId, DataModelIdentifier +from cognite.neat._constants import COGNITE_SPACES from cognite.neat._graph import examples as instances_examples from cognite.neat._graph import extractors from cognite.neat._issues import IssueList from cognite.neat._issues.errors import NeatValueError from cognite.neat._rules import importers from cognite.neat._rules._shared import ReadRules +from cognite.neat._rules.importers import BaseImporter from cognite.neat._store._provenance import Activity as ProvenanceActivity from cognite.neat._store._provenance import Change from cognite.neat._store._provenance import Entity as ProvenanceEntity @@ -32,6 +34,7 @@ def __init__(self, state: SessionState, client: CogniteClient | None, verbose: b self.rdf = RDFReadAPI(state, client, verbose) self.excel = ExcelReadAPI(state, client, verbose) self.csv = CSVReadAPI(state, client, verbose) + self.yaml = YamlReadAPI(state, client, verbose) @intercept_session_exceptions @@ -146,6 +149,56 @@ def __call__(self, io: Any) -> IssueList: return input_rules.issues +@intercept_session_exceptions +class YamlReadAPI(BaseReadAPI): + def __call__(self, io: Any, format: Literal["neat", "toolkit"] = "neat") -> IssueList: + reader = NeatReader.create(io) + if not isinstance(reader, PathReader): + raise NeatValueError("Only file paths are supported for YAML files") + start = datetime.now(timezone.utc) + importer: BaseImporter + if format == "neat": + importer = importers.YAMLImporter.from_file(reader.path) + elif format == "toolkit": + if reader.path.is_file(): + dms_importer = importers.DMSImporter.from_zip_file(reader.path) + elif reader.path.is_dir(): + dms_importer = importers.DMSImporter.from_directory(reader.path) + else: + raise NeatValueError(f"Unsupported YAML format: {format}") + ref_containers = dms_importer.root_schema.referenced_container() + if system_container_ids := [ + container_id for container_id in ref_containers if container_id.space in COGNITE_SPACES + ]: + if self._client is None: + raise NeatSessionError( + "No client provided. You are referencing Cognite containers in your data model, " + "NEAT needs a client to lookup the container definitions. " + "Please set the client in the session, NeatSession(client=client)." + ) + system_containers = self._client.data_modeling.containers.retrieve(system_container_ids) + dms_importer.update_referenced_containers(system_containers) + + importer = dms_importer + else: + raise NeatValueError(f"Unsupported YAML format: {format}") + input_rules: ReadRules = importer.to_rules() + + end = datetime.now(timezone.utc) + + if input_rules.rules: + change = Change.from_rules_activity( + input_rules, + importer.agent, + start, + end, + description=f"YAML file {reader!s} read as unverified data model", + ) + self._store_rules(input_rules, change) + + return input_rules.issues + + @intercept_session_exceptions class CSVReadAPI(BaseReadAPI): def __call__(self, io: Any, type: str, primary_key: str) -> None: diff --git a/cognite/neat/_session/_to.py b/cognite/neat/_session/_to.py index 1162d5d5f..6207526e9 100644 --- a/cognite/neat/_session/_to.py +++ b/cognite/neat/_session/_to.py @@ -46,17 +46,30 @@ def excel( return None @overload - def yaml(self, io: None) -> str: ... + def yaml(self, io: None, format: Literal["neat"] = "neat") -> str: ... @overload - def yaml(self, io: Any) -> None: ... - - def yaml(self, io: Any | None = None) -> str | None: - exporter = exporters.YAMLExporter() - if io is None: - return exporter.export(self._state.data_model.last_verified_rule[1]) + def yaml(self, io: Any, format: Literal["neat", "toolkit"] = "neat") -> None: ... + + def yaml(self, io: Any | None = None, format: Literal["neat", "toolkit"] = "neat") -> str | None: + if format == "neat": + exporter = exporters.YAMLExporter() + last_verified = self._state.data_model.last_verified_rule[1] + if io is None: + return exporter.export(last_verified) + + exporter.export_to_file(last_verified, Path(io)) + elif format == "toolkit": + if io is None or not isinstance(io, str | Path): + raise NeatSessionError( + "Please provide a zip file or directory path to write the YAML files to." + "This is required for the 'toolkit' format." + ) + dms_rule = self._state.data_model.last_verified_dms_rules[1] + exporters.DMSExporter().export_to_file(dms_rule, Path(io)) + else: + raise NeatSessionError("Please provide a valid format. {['neat', 'toolkit']}") - exporter.export_to_file(self._state.data_model.last_verified_rule[1], Path(io)) return None diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index db0ffe1f4..8f25bb3c4 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -15,6 +15,11 @@ Changes are grouped as follows: - `Fixed` for any bug fixes. - `Security` in case of vulnerabilities. +## TBD +### Added +- Support for reading `YAML` +- Support for writing `YAML` in toolkit format. + ## [0.97.3] - 16-11-**2024** ### Improved - `...make_connection_on_exact_match` now takes strings instead of URIRefs