Skip to content

Commit

Permalink
Read yaml (#753)
Browse files Browse the repository at this point in the history
* feat; yaml read api

* build; changelog

* refactor: lookup system containers

* style: language

* style: wording

* style: .

* refactor; skip validation of core model

* refactor; improved debugging

* Apply suggestions from code review

* refactor: revert all exception for cognite spaces
  • Loading branch information
doctrino authored Nov 18, 2024
1 parent 593811a commit 9714270
Show file tree
Hide file tree
Showing 7 changed files with 101 additions and 11 deletions.
10 changes: 9 additions & 1 deletion cognite/neat/_rules/importers/_dms2rules.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from collections import Counter
from collections.abc import Collection, Sequence
from collections.abc import Collection, Iterable, Sequence
from datetime import datetime, timezone
from pathlib import Path
from typing import Literal, cast
Expand Down Expand Up @@ -86,6 +86,14 @@ def __init__(
self._all_containers_by_id.update(schema.reference.containers.items())
self._all_views_by_id.update(schema.reference.views.items())

def update_referenced_containers(self, containers: Iterable[dm.ContainerApply]) -> None:
"""Update the referenced containers. This is useful to add Cognite containers identified after the root schema
is read"""
for container in containers:
if container.as_id() in self._all_containers_by_id:
continue
self._all_containers_by_id[container.as_id()] = container

@classmethod
def from_data_model_id(
cls,
Expand Down
4 changes: 4 additions & 0 deletions cognite/neat/_rules/models/_base_rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,10 @@ class SheetRow(SchemaModel):
def _identifier(self) -> tuple[Hashable, ...]:
raise NotImplementedError()

def __repr__(self) -> str:
# Simplified representation of the object for debugging
return f"{self.__class__.__name__}({self._identifier()})"


T_SheetRow = TypeVar("T_SheetRow", bound=SheetRow)

Expand Down
7 changes: 7 additions & 0 deletions cognite/neat/_rules/models/dms/_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -708,6 +708,13 @@ def referenced_spaces(self, include_indirect_references: bool = True) -> set[str
referenced_spaces |= {s.space for s in self.spaces.values()}
return referenced_spaces

def referenced_container(self) -> set[dm.ContainerId]:
referenced_containers = {
container for view in self.views.values() for container in view.referenced_containers()
}
referenced_containers |= set(self.containers.keys())
return referenced_containers

def as_read_model(self) -> dm.DataModel[dm.View]:
if self.data_model is None:
raise ValueError("Data model is not defined")
Expand Down
2 changes: 1 addition & 1 deletion cognite/neat/_rules/models/dms/_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ def _referenced_views_and_containers_are_existing_and_proper_size(self) -> None:
}

for prop_no, prop in enumerate(self.properties):
if prop.container and (container_id := prop.container.as_id()) not in defined_containers:
if prop.container and ((container_id := prop.container.as_id()) not in defined_containers):
errors.append(
ResourceNotDefinedError(
identifier=container_id,
Expand Down
55 changes: 54 additions & 1 deletion cognite/neat/_session/_read.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,19 @@
import tempfile
from datetime import datetime, timezone
from pathlib import Path
from typing import Any
from typing import Any, Literal

from cognite.client import CogniteClient
from cognite.client.data_classes.data_modeling import DataModelId, DataModelIdentifier

from cognite.neat._constants import COGNITE_SPACES
from cognite.neat._graph import examples as instances_examples
from cognite.neat._graph import extractors
from cognite.neat._issues import IssueList
from cognite.neat._issues.errors import NeatValueError
from cognite.neat._rules import importers
from cognite.neat._rules._shared import ReadRules
from cognite.neat._rules.importers import BaseImporter
from cognite.neat._store._provenance import Activity as ProvenanceActivity
from cognite.neat._store._provenance import Change
from cognite.neat._store._provenance import Entity as ProvenanceEntity
Expand All @@ -32,6 +34,7 @@ def __init__(self, state: SessionState, client: CogniteClient | None, verbose: b
self.rdf = RDFReadAPI(state, client, verbose)
self.excel = ExcelReadAPI(state, client, verbose)
self.csv = CSVReadAPI(state, client, verbose)
self.yaml = YamlReadAPI(state, client, verbose)


@intercept_session_exceptions
Expand Down Expand Up @@ -146,6 +149,56 @@ def __call__(self, io: Any) -> IssueList:
return input_rules.issues


@intercept_session_exceptions
class YamlReadAPI(BaseReadAPI):
def __call__(self, io: Any, format: Literal["neat", "toolkit"] = "neat") -> IssueList:
reader = NeatReader.create(io)
if not isinstance(reader, PathReader):
raise NeatValueError("Only file paths are supported for YAML files")
start = datetime.now(timezone.utc)
importer: BaseImporter
if format == "neat":
importer = importers.YAMLImporter.from_file(reader.path)
elif format == "toolkit":
if reader.path.is_file():
dms_importer = importers.DMSImporter.from_zip_file(reader.path)
elif reader.path.is_dir():
dms_importer = importers.DMSImporter.from_directory(reader.path)
else:
raise NeatValueError(f"Unsupported YAML format: {format}")
ref_containers = dms_importer.root_schema.referenced_container()
if system_container_ids := [
container_id for container_id in ref_containers if container_id.space in COGNITE_SPACES
]:
if self._client is None:
raise NeatSessionError(
"No client provided. You are referencing Cognite containers in your data model, "
"NEAT needs a client to lookup the container definitions. "
"Please set the client in the session, NeatSession(client=client)."
)
system_containers = self._client.data_modeling.containers.retrieve(system_container_ids)
dms_importer.update_referenced_containers(system_containers)

importer = dms_importer
else:
raise NeatValueError(f"Unsupported YAML format: {format}")
input_rules: ReadRules = importer.to_rules()

end = datetime.now(timezone.utc)

if input_rules.rules:
change = Change.from_rules_activity(
input_rules,
importer.agent,
start,
end,
description=f"YAML file {reader!s} read as unverified data model",
)
self._store_rules(input_rules, change)

return input_rules.issues


@intercept_session_exceptions
class CSVReadAPI(BaseReadAPI):
def __call__(self, io: Any, type: str, primary_key: str) -> None:
Expand Down
29 changes: 21 additions & 8 deletions cognite/neat/_session/_to.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,17 +46,30 @@ def excel(
return None

@overload
def yaml(self, io: None) -> str: ...
def yaml(self, io: None, format: Literal["neat"] = "neat") -> str: ...

@overload
def yaml(self, io: Any) -> None: ...

def yaml(self, io: Any | None = None) -> str | None:
exporter = exporters.YAMLExporter()
if io is None:
return exporter.export(self._state.data_model.last_verified_rule[1])
def yaml(self, io: Any, format: Literal["neat", "toolkit"] = "neat") -> None: ...

def yaml(self, io: Any | None = None, format: Literal["neat", "toolkit"] = "neat") -> str | None:
if format == "neat":
exporter = exporters.YAMLExporter()
last_verified = self._state.data_model.last_verified_rule[1]
if io is None:
return exporter.export(last_verified)

exporter.export_to_file(last_verified, Path(io))
elif format == "toolkit":
if io is None or not isinstance(io, str | Path):
raise NeatSessionError(
"Please provide a zip file or directory path to write the YAML files to."
"This is required for the 'toolkit' format."
)
dms_rule = self._state.data_model.last_verified_dms_rules[1]
exporters.DMSExporter().export_to_file(dms_rule, Path(io))
else:
raise NeatSessionError("Please provide a valid format. {['neat', 'toolkit']}")

exporter.export_to_file(self._state.data_model.last_verified_rule[1], Path(io))
return None


Expand Down
5 changes: 5 additions & 0 deletions docs/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,11 @@ Changes are grouped as follows:
- `Fixed` for any bug fixes.
- `Security` in case of vulnerabilities.

## TBD
### Added
- Support for reading `YAML`
- Support for writing `YAML` in toolkit format.

## [0.97.3] - 16-11-**2024**
### Improved
- `...make_connection_on_exact_match` now takes strings instead of URIRefs
Expand Down

0 comments on commit 9714270

Please sign in to comment.