Skip to content

Commit

Permalink
Neat 486 add neat.rdf.examples.nordic44 (#671)
Browse files Browse the repository at this point in the history
* improve stats

* docs

* a bit better title
  • Loading branch information
nikokaoja authored Oct 25, 2024
1 parent 017d294 commit 6923f58
Show file tree
Hide file tree
Showing 5 changed files with 113 additions and 62 deletions.
8 changes: 4 additions & 4 deletions cognite/neat/_session/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def infer(
cast(InformationInputRules, input_rules.rules).metadata.name = external_id
cast(InformationInputRules, input_rules.rules).metadata.version = version

self.read._store_rules(self._state.store, input_rules, "Data Model Inference")
self.read.rdf._store_rules(self._state.store, input_rules, "Data Model Inference")
return input_rules.issues

def _repr_html_(self) -> str:
Expand All @@ -72,13 +72,13 @@ def _repr_html_(self) -> str:
if state.input_rules and not state.verified_rules:
metadata = cast(InputComponent, state.input_rule.rules.metadata) # type: ignore[union-attr]
table = pd.DataFrame([metadata.dump()]).T._repr_html_() # type: ignore[operator]
output.append(f"<strong>Raw DataModel</strong><br />{table}")
output.append(f"<H2>Raw Data Model</H2><br />{table}")

if state.verified_rules:
table = pd.DataFrame([state.last_verified_rule.metadata.model_dump()]).T._repr_html_() # type: ignore[operator]
output.append(f"<strong>DataModel</strong><br />{table}")
output.append(f"<H2>Data Model</H2><br />{table}")

if state.has_store:
output.append(f"<strong>Metadata</strong> {state.store._repr_html_()}")
output.append(f"<H2>Instances</H2> {state.store._repr_html_()}")

return "<br />".join(output)
107 changes: 61 additions & 46 deletions cognite/neat/_session/_read.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,11 @@

from cognite.client import CogniteClient

from cognite.neat._graph import examples as instances_examples
from cognite.neat._graph import extractors
from cognite.neat._issues import IssueList
from cognite.neat._rules import importers
from cognite.neat._rules._shared import ReadRules
from cognite.neat._store import NeatGraphStore

from ._state import SessionState
from ._wizard import NeatObjectType, RDFFileType, object_wizard, rdf_dm_wizard
Expand All @@ -17,15 +17,64 @@ class ReadAPI:
def __init__(self, state: SessionState, client: CogniteClient | None, verbose: bool) -> None:
self._state = state
self._verbose = verbose
self.cdf = CDFReadAPI(state, client)
self.cdf = CDFReadAPI(state, client, verbose)
self.rdf = RDFReadAPI(state, client, verbose)
self.excel = ExcelReadAPI(state, client, verbose)

def excel(self, io: Any) -> IssueList:

class BaseReadAPI:
def __init__(self, state: SessionState, client: CogniteClient | None, verbose: bool) -> None:
self._state = state
self._verbose = verbose
self._client = client

def _store_rules(self, io: Any, input_rules: ReadRules, source: str) -> None:
if input_rules.rules:
self._state.input_rules.append(input_rules)
if self._verbose:
if input_rules.issues.has_errors:
print(f"{source} {type(io)} {io} read failed")
else:
print(f"{source} {type(io)} {io} read successfully")

def _return_filepath(self, io: Any) -> Path:
if isinstance(io, str):
return Path(io)
elif isinstance(io, Path):
return io
else:
raise ValueError(f"Expected str or Path, got {type(io)}")


class CDFReadAPI(BaseReadAPI): ...


class ExcelReadAPI(BaseReadAPI):
def __call__(self, io: Any) -> IssueList:
filepath = self._return_filepath(io)
input_rules: ReadRules = importers.ExcelImporter(filepath).to_rules()
self._store_rules(io, input_rules, "Excel")
return input_rules.issues

def rdf(

class RDFReadAPI(BaseReadAPI):
def __init__(self, state: SessionState, client: CogniteClient | None, verbose: bool) -> None:
super().__init__(state, client, verbose)
self.examples = RDFExamples(state)

def _ontology(self, io: Any) -> IssueList:
filepath = self._return_filepath(io)
input_rules: ReadRules = importers.OWLImporter.from_file(filepath).to_rules()
self._store_rules(io, input_rules, "Ontology")
return input_rules.issues

def _imf(self, io: Any) -> IssueList:
filepath = self._return_filepath(io)
input_rules: ReadRules = importers.IMFImporter.from_file(filepath).to_rules()
self._store_rules(io, input_rules, "IMF Types")
return input_rules.issues

def __call__(
self,
io: Any,
type: NeatObjectType | None = None,
Expand All @@ -42,53 +91,19 @@ def rdf(
return self._imf(io)
else:
raise ValueError(f"Expected ontology, imf or instances, got {source}")

elif type.lower() == "Instances".lower():
self._state.store.write(extractors.RdfFileExtractor(self._return_filepath(io)))
return IssueList()
else:
raise ValueError(f"Expected data model or instances, got {type}")

def _ontology(self, io: Any) -> IssueList:
filepath = self._return_filepath(io)
input_rules: ReadRules = importers.OWLImporter.from_file(filepath).to_rules()
self._store_rules(io, input_rules, "Ontology")
return input_rules.issues

def _imf(self, io: Any) -> IssueList:
filepath = self._return_filepath(io)
input_rules: ReadRules = importers.IMFImporter.from_file(filepath).to_rules()
self._store_rules(io, input_rules, "IMF Types")
return input_rules.issues

def _inference(self, io: Any) -> IssueList:
if isinstance(io, NeatGraphStore):
importer = importers.InferenceImporter.from_graph_store(io)
else:
importer = importers.InferenceImporter.from_file(self._return_filepath(io))

input_rules: ReadRules = importer.to_rules()
self._store_rules(io, input_rules, "Inference")
return input_rules.issues

def _return_filepath(self, io: Any) -> Path:
if isinstance(io, str):
return Path(io)
elif isinstance(io, Path):
return io
else:
raise ValueError(f"Expected str or Path, got {type(io)}")

def _store_rules(self, io: Any, input_rules: ReadRules, source: str) -> None:
if input_rules.rules:
self._state.input_rules.append(input_rules)
if self._verbose:
if input_rules.issues.has_errors:
print(f"{source} {type(io)} {io} read failed")
else:
print(f"{source} {type(io)} {io} read successfully")


class CDFReadAPI:
def __init__(self, state: SessionState, client: CogniteClient | None) -> None:
class RDFExamples:
def __init__(self, state: SessionState) -> None:
self._state = state
self._client = client

@property
def nordic44(self) -> IssueList:
self._state.store.write(extractors.RdfFileExtractor(instances_examples.nordic44_knowledge_graph))
return IssueList()
47 changes: 35 additions & 12 deletions cognite/neat/_store/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,10 @@
from collections.abc import Iterable
from datetime import datetime, timezone
from pathlib import Path
from typing import cast
from typing import Any, cast

import pandas as pd
from pandas import Index
from rdflib import Graph, Namespace, URIRef
from rdflib.plugins.stores.sparqlstore import SPARQLUpdateStore

Expand Down Expand Up @@ -350,16 +351,38 @@ def summary(self) -> pd.DataFrame:
def _repr_html_(self) -> str:
provenance = self.provenance._repr_html_()
summary: pd.DataFrame = self.summary
summary_text = (
"<br /><strong>Graph is empty</strong><br />"
if summary.empty
else f"<br /><strong>Graph content</strong><br />{cast(pd.DataFrame, summary)._repr_html_()}" # type: ignore[operator]
)

return (
f"<strong>{type(self).__name__}</strong> A graph store is a container for storing triples. "
"It can be queried and transformed to extract information.<br />"
"<strong>Provenance</strong> Provenance is a record of changes that have occurred in the graph store.<br />"
f"{provenance}"
f"{summary_text}"
if summary.empty:
summary_text = "<br /><strong>Graph is empty</strong><br />"
else:
summary_text = (
"<br /><strong>Overview</strong>:" # type: ignore
f"<ul><li>{len(summary)} types</strong></li>"
f"<li>{sum(summary['Occurrence'])} instances</strong></li></ul>"
f"{cast(pd.DataFrame, self._shorten_summary(summary))._repr_html_()}"
)

return f"{summary_text}" f"{provenance}"

def _shorten_summary(self, summary: pd.DataFrame) -> pd.DataFrame:
"""Shorten summary to top 5 types by occurrence."""
top_5_rows = summary.head(5)
last_row = summary.tail(1)

indexes = [
*top_5_rows.index.tolist(),
"...",
*last_row.index.tolist(),
]

shorter_summary = pd.concat(
[
top_5_rows,
pd.DataFrame([["..."] * summary.shape[1]], columns=summary.columns),
last_row,
],
ignore_index=True,
)
shorter_summary.index = cast(Index[Any], indexes)

return shorter_summary
10 changes: 10 additions & 0 deletions cognite/neat/_store/_provenance.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,3 +114,13 @@ def __delitem__(self, *args, **kwargs):

def __setitem__(self, *args, **kwargs):
raise TypeError("Cannot modify change from provenance")

def _repr_html_(self) -> str:
text = "<br /><strong>Provenance</strong>:<ul>"

for change in self:
text += f"<li>{change.description}</li>"

text += "</ul>"

return text
3 changes: 3 additions & 0 deletions docs/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ Changes are grouped as follows:
- Reorg prefixes
- Added more detail regex testing of entities
- Transformation is now generated for every RDF based rules importer
- Improved session overview in UI

### Added
- Added `NeatSession`
Expand All @@ -36,6 +37,8 @@ Changes are grouped as follows:
- Graph transformer `SplitMultiValueProperty` which splits multi-value properties into separate properties with single value
- Support for `xsd:decimal` which is now mapped to `float64` in DMS rules
- Added RDF based readers for `NeatSession`
- `NeatSession.read.rdf.examples.nordic44`


### Removed
- State on DataType stored in `_dms_loaded` attribute
Expand Down

0 comments on commit 6923f58

Please sign in to comment.