Neat 486 add neat.rdf.examples.nordic44 (#671)

* improve stats * docs * a bit better title
cognitedata · Oct 25, 2024 · 6923f58 · 6923f58
1 parent 017d294
commit 6923f58
Show file tree

Hide file tree

Showing 5 changed files with 113 additions and 62 deletions.
diff --git a/cognite/neat/_session/_base.py b/cognite/neat/_session/_base.py
@@ -60,7 +60,7 @@ def infer(
         cast(InformationInputRules, input_rules.rules).metadata.name = external_id
         cast(InformationInputRules, input_rules.rules).metadata.version = version
 
-        self.read._store_rules(self._state.store, input_rules, "Data Model Inference")
+        self.read.rdf._store_rules(self._state.store, input_rules, "Data Model Inference")
         return input_rules.issues
 
     def _repr_html_(self) -> str:
@@ -72,13 +72,13 @@ def _repr_html_(self) -> str:
         if state.input_rules and not state.verified_rules:
             metadata = cast(InputComponent, state.input_rule.rules.metadata)  # type: ignore[union-attr]
             table = pd.DataFrame([metadata.dump()]).T._repr_html_()  # type: ignore[operator]
-            output.append(f"<strong>Raw DataModel</strong><br />{table}")
+            output.append(f"<H2>Raw Data Model</H2><br />{table}")
 
         if state.verified_rules:
             table = pd.DataFrame([state.last_verified_rule.metadata.model_dump()]).T._repr_html_()  # type: ignore[operator]
-            output.append(f"<strong>DataModel</strong><br />{table}")
+            output.append(f"<H2>Data Model</H2><br />{table}")
 
         if state.has_store:
-            output.append(f"<strong>Metadata</strong> {state.store._repr_html_()}")
+            output.append(f"<H2>Instances</H2> {state.store._repr_html_()}")
 
         return "<br />".join(output)
diff --git a/cognite/neat/_session/_read.py b/cognite/neat/_session/_read.py
@@ -3,11 +3,11 @@
 
 from cognite.client import CogniteClient
 
+from cognite.neat._graph import examples as instances_examples
 from cognite.neat._graph import extractors
 from cognite.neat._issues import IssueList
 from cognite.neat._rules import importers
 from cognite.neat._rules._shared import ReadRules
-from cognite.neat._store import NeatGraphStore
 
 from ._state import SessionState
 from ._wizard import NeatObjectType, RDFFileType, object_wizard, rdf_dm_wizard
@@ -17,15 +17,64 @@ class ReadAPI:
     def __init__(self, state: SessionState, client: CogniteClient | None, verbose: bool) -> None:
         self._state = state
         self._verbose = verbose
-        self.cdf = CDFReadAPI(state, client)
+        self.cdf = CDFReadAPI(state, client, verbose)
+        self.rdf = RDFReadAPI(state, client, verbose)
+        self.excel = ExcelReadAPI(state, client, verbose)
 
-    def excel(self, io: Any) -> IssueList:
+
+class BaseReadAPI:
+    def __init__(self, state: SessionState, client: CogniteClient | None, verbose: bool) -> None:
+        self._state = state
+        self._verbose = verbose
+        self._client = client
+
+    def _store_rules(self, io: Any, input_rules: ReadRules, source: str) -> None:
+        if input_rules.rules:
+            self._state.input_rules.append(input_rules)
+            if self._verbose:
+                if input_rules.issues.has_errors:
+                    print(f"{source} {type(io)} {io} read failed")
+                else:
+                    print(f"{source} {type(io)} {io} read successfully")
+
+    def _return_filepath(self, io: Any) -> Path:
+        if isinstance(io, str):
+            return Path(io)
+        elif isinstance(io, Path):
+            return io
+        else:
+            raise ValueError(f"Expected str or Path, got {type(io)}")
+
+
+class CDFReadAPI(BaseReadAPI): ...
+
+
+class ExcelReadAPI(BaseReadAPI):
+    def __call__(self, io: Any) -> IssueList:
         filepath = self._return_filepath(io)
         input_rules: ReadRules = importers.ExcelImporter(filepath).to_rules()
         self._store_rules(io, input_rules, "Excel")
         return input_rules.issues
 
-    def rdf(
+
+class RDFReadAPI(BaseReadAPI):
+    def __init__(self, state: SessionState, client: CogniteClient | None, verbose: bool) -> None:
+        super().__init__(state, client, verbose)
+        self.examples = RDFExamples(state)
+
+    def _ontology(self, io: Any) -> IssueList:
+        filepath = self._return_filepath(io)
+        input_rules: ReadRules = importers.OWLImporter.from_file(filepath).to_rules()
+        self._store_rules(io, input_rules, "Ontology")
+        return input_rules.issues
+
+    def _imf(self, io: Any) -> IssueList:
+        filepath = self._return_filepath(io)
+        input_rules: ReadRules = importers.IMFImporter.from_file(filepath).to_rules()
+        self._store_rules(io, input_rules, "IMF Types")
+        return input_rules.issues
+
+    def __call__(
         self,
         io: Any,
         type: NeatObjectType | None = None,
@@ -42,53 +91,19 @@ def rdf(
                 return self._imf(io)
             else:
                 raise ValueError(f"Expected ontology, imf or instances, got {source}")
+
         elif type.lower() == "Instances".lower():
             self._state.store.write(extractors.RdfFileExtractor(self._return_filepath(io)))
             return IssueList()
         else:
             raise ValueError(f"Expected data model or instances, got {type}")
 
-    def _ontology(self, io: Any) -> IssueList:
-        filepath = self._return_filepath(io)
-        input_rules: ReadRules = importers.OWLImporter.from_file(filepath).to_rules()
-        self._store_rules(io, input_rules, "Ontology")
-        return input_rules.issues
-
-    def _imf(self, io: Any) -> IssueList:
-        filepath = self._return_filepath(io)
-        input_rules: ReadRules = importers.IMFImporter.from_file(filepath).to_rules()
-        self._store_rules(io, input_rules, "IMF Types")
-        return input_rules.issues
-
-    def _inference(self, io: Any) -> IssueList:
-        if isinstance(io, NeatGraphStore):
-            importer = importers.InferenceImporter.from_graph_store(io)
-        else:
-            importer = importers.InferenceImporter.from_file(self._return_filepath(io))
-
-        input_rules: ReadRules = importer.to_rules()
-        self._store_rules(io, input_rules, "Inference")
-        return input_rules.issues
-
-    def _return_filepath(self, io: Any) -> Path:
-        if isinstance(io, str):
-            return Path(io)
-        elif isinstance(io, Path):
-            return io
-        else:
-            raise ValueError(f"Expected str or Path, got {type(io)}")
 
-    def _store_rules(self, io: Any, input_rules: ReadRules, source: str) -> None:
-        if input_rules.rules:
-            self._state.input_rules.append(input_rules)
-            if self._verbose:
-                if input_rules.issues.has_errors:
-                    print(f"{source} {type(io)} {io} read failed")
-                else:
-                    print(f"{source} {type(io)} {io} read successfully")
-
-
-class CDFReadAPI:
-    def __init__(self, state: SessionState, client: CogniteClient | None) -> None:
+class RDFExamples:
+    def __init__(self, state: SessionState) -> None:
         self._state = state
-        self._client = client
+
+    @property
+    def nordic44(self) -> IssueList:
+        self._state.store.write(extractors.RdfFileExtractor(instances_examples.nordic44_knowledge_graph))
+        return IssueList()
diff --git a/cognite/neat/_store/_base.py b/cognite/neat/_store/_base.py
@@ -3,9 +3,10 @@
 from collections.abc import Iterable
 from datetime import datetime, timezone
 from pathlib import Path
-from typing import cast
+from typing import Any, cast
 
 import pandas as pd
+from pandas import Index
 from rdflib import Graph, Namespace, URIRef
 from rdflib.plugins.stores.sparqlstore import SPARQLUpdateStore
 
@@ -350,16 +351,38 @@ def summary(self) -> pd.DataFrame:
     def _repr_html_(self) -> str:
         provenance = self.provenance._repr_html_()
         summary: pd.DataFrame = self.summary
-        summary_text = (
-            "<br /><strong>Graph is empty</strong><br />"
-            if summary.empty
-            else f"<br /><strong>Graph content</strong><br />{cast(pd.DataFrame, summary)._repr_html_()}"  # type: ignore[operator]
-        )
 
-        return (
-            f"<strong>{type(self).__name__}</strong> A graph store is a container for storing triples. "
-            "It can be queried and transformed to extract information.<br />"
-            "<strong>Provenance</strong> Provenance is a record of changes that have occurred in the graph store.<br />"
-            f"{provenance}"
-            f"{summary_text}"
+        if summary.empty:
+            summary_text = "<br /><strong>Graph is empty</strong><br />"
+        else:
+            summary_text = (
+                "<br /><strong>Overview</strong>:"  # type: ignore
+                f"<ul><li>{len(summary)} types</strong></li>"
+                f"<li>{sum(summary['Occurrence'])} instances</strong></li></ul>"
+                f"{cast(pd.DataFrame, self._shorten_summary(summary))._repr_html_()}"
+            )
+
+        return f"{summary_text}" f"{provenance}"
+
+    def _shorten_summary(self, summary: pd.DataFrame) -> pd.DataFrame:
+        """Shorten summary to top 5 types by occurrence."""
+        top_5_rows = summary.head(5)
+        last_row = summary.tail(1)
+
+        indexes = [
+            *top_5_rows.index.tolist(),
+            "...",
+            *last_row.index.tolist(),
+        ]
+
+        shorter_summary = pd.concat(
+            [
+                top_5_rows,
+                pd.DataFrame([["..."] * summary.shape[1]], columns=summary.columns),
+                last_row,
+            ],
+            ignore_index=True,
         )
+        shorter_summary.index = cast(Index[Any], indexes)
+
+        return shorter_summary
diff --git a/cognite/neat/_store/_provenance.py b/cognite/neat/_store/_provenance.py
@@ -114,3 +114,13 @@ def __delitem__(self, *args, **kwargs):
 
     def __setitem__(self, *args, **kwargs):
         raise TypeError("Cannot modify change from provenance")
+
+    def _repr_html_(self) -> str:
+        text = "<br /><strong>Provenance</strong>:<ul>"
+
+        for change in self:
+            text += f"<li>{change.description}</li>"
+
+        text += "</ul>"
+
+        return text
diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md
@@ -27,6 +27,7 @@ Changes are grouped as follows:
 - Reorg prefixes
 - Added more detail regex testing of entities
 - Transformation is now generated for every RDF based rules importer
+- Improved session overview in UI
 
 ### Added
 - Added `NeatSession`
@@ -36,6 +37,8 @@ Changes are grouped as follows:
 - Graph transformer `SplitMultiValueProperty` which splits multi-value properties into separate properties with single value
 - Support for `xsd:decimal` which is now mapped to `float64` in DMS rules
 - Added RDF based readers for `NeatSession`
+- `NeatSession.read.rdf.examples.nordic44`
+
 
 ### Removed
 - State on DataType stored in `_dms_loaded` attribute