bio-ontology-research-group · ferzcam · Jul 6, 2024 · Jul 30, 2024 · Sep 27, 2024 · Oct 16, 2024
diff --git a/mowl/base_models/elmodel.py b/mowl/base_models/elmodel.py
@@ -35,7 +35,14 @@ class EmbeddingELModel(Model):
     :type device: str, optional
     """
 
-    def __init__(self, dataset, embed_dim, batch_size, extended=True, model_filepath=None, load_normalized=False, device="cpu"):
+    def __init__(self, 
+                 dataset, 
+                 embed_dim, 
+                 batch_size, 
+                 extended=True, 
+                 model_filepath=None, 
+                 # load_normalized=False, 
+                 device="cpu"):
         super().__init__(dataset, model_filepath=model_filepath)
 
         if not isinstance(embed_dim, int):
@@ -47,8 +54,8 @@ def __init__(self, dataset, embed_dim, batch_size, extended=True, model_filepath
         if not isinstance(extended, bool):
             raise TypeError("Optional parameter extended must be of type bool.")
 
-        if not isinstance(load_normalized, bool):
-            raise TypeError("Optional parameter load_normalized must be of type bool.")
+        # if not isinstance(load_normalized, bool):
+        #     raise TypeError("Optional parameter load_normalized must be of type bool.")
 
         if not isinstance(device, str):
             raise TypeError("Optional parameter device must be of type str.")
@@ -59,7 +66,7 @@ def __init__(self, dataset, embed_dim, batch_size, extended=True, model_filepath
         self.embed_dim = embed_dim
         self.batch_size = batch_size
         self.device = device
-        self.load_normalized = load_normalized
+        # self.load_normalized = load_normalized
 
         self._training_datasets = None
         self._validation_datasets = None
@@ -78,27 +85,37 @@ def _load_datasets(self):
             return
 
         training_el_dataset = ELDataset(self.dataset.ontology,
+                                        self.dataset.normalized,
+                                        self.dataset._normalized_flag,
                                         self.class_index_dict,
                                         self.object_property_index_dict,
                                         extended=self._extended,
-                                        load_normalized = self.load_normalized,
+                                        # load_normalized = self.load_normalized,
                                         device=self.device)
 
         self._training_datasets = training_el_dataset.get_gci_datasets()
 
         self._validation_datasets = None
         if self.dataset.validation:
-            validation_el_dataset = ELDataset(self.dataset.validation, self.class_index_dict,
+            validation_el_dataset = ELDataset(self.dataset.validation, 
+                                              self.dataset._normalized,
+                                              self.dataset._normalized_flag,
+                                              self.class_index_dict,
                                               self.object_property_index_dict,
-                                              extended=self._extended, device=self.device)
+                                              extended=self._extended, 
+                                              device=self.device)
 
             self._validation_datasets = validation_el_dataset.get_gci_datasets()
 
         self._testing_datasets = None
         if self.dataset.testing:
-            testing_el_dataset = ELDataset(self.dataset.testing, self.class_index_dict,
+            testing_el_dataset = ELDataset(self.dataset.testing, 
+                                           self.dataset._normalized,
+                                           self.dataset._normalized_flag,
+                                           self.class_index_dict,
                                            self.object_property_index_dict,
-                                           extended=self._extended, device=self.device)
+                                           extended=self._extended, 
+                                           device=self.device)
 
             self._testing_datasets = testing_el_dataset.get_gci_datasets()
 

diff --git a/mowl/datasets/base.py b/mowl/datasets/base.py
@@ -17,6 +17,7 @@
 from mowl.owlapi.adapter import OWLAPIAdapter
 from mowl.owlapi.defaults import TOP, BOT
 from deprecated.sphinx import versionadded, versionchanged
+from mowl.ontology.normalize import ELNormalizer
 
 
 from java.util import HashSet
@@ -34,7 +35,7 @@ class Dataset():
     :type testing: :class:`org.semanticweb.owlapi.model.OWLOntology`, optional
     """
 
-    def __init__(self, ontology, validation=None, testing=None):
+    def __init__(self, ontology, normalized=None, normalized_flag=False, validation=None, testing=None):
 
         if not isinstance(ontology, OWLOntology):
             raise TypeError("Parameter ontology must be an OWLOntology.")
@@ -49,11 +50,16 @@ def __init__(self, ontology, validation=None, testing=None):
         self._validation = validation
         self._testing = testing
 
+        self._normalized = normalized
         self._classes = None
         self._individuals = None
         self._object_properties = None
         self._individuals = None
         self._evaluation_classes = None
+        self._normalized_flag = normalized_flag
+
+    def normalized_flag(self):
+        return self._normalized_flag
 
     @property
     def ontology(self):
@@ -79,6 +85,19 @@ def testing(self):
         """
         return self._testing
 
+    @property
+    def normalized(self):
+        if self._normalized_flag:
+            return self._normalized
+        if self._normalized is not None:
+            self._normalized_flag = True
+            return self._normalized
+
+        elnorm = ELNormalizer()
+        self._normalized = elnorm.normalize(self._ontology)
+        self._normalized_flag = True
+        return self._normalized
+
     @property
     def classes(self):
         """List of classes in the dataset. The classes are collected from training, validation and
@@ -91,7 +110,10 @@ def classes(self):
             top = adapter.create_class(TOP)
             bot = adapter.create_class(BOT)
             classes = set([top, bot])
-            classes |= set(self._ontology.getClassesInSignature())
+            for k in self.normalized.keys():
+                for ax in self.normalized[k]:
+                    classes |= set(ax.owl_axiom.getClassesInSignature())
+            # classes |= set(self._ontology.getClassesInSignature())
 
             if self._validation:
                 classes |= set(self._validation.getClassesInSignature())
@@ -202,6 +224,8 @@ def add_axioms(self, *axioms):
         self._object_properties = None
         self._individuals = None
         self._evaluation_classes = None
+        self._normalized = None
+        self._normalized_flag = False
 
 
 

diff --git a/mowl/datasets/el/el_dataset.py b/mowl/datasets/el/el_dataset.py
@@ -4,6 +4,7 @@
 from mowl.datasets.gci import GCIDataset, ClassAssertionDataset, ObjectPropertyAssertionDataset
 import random
 from org.semanticweb.owlapi.model import OWLOntology
+from mowl.datasets.base import Dataset
 
 
 class ELDataset():
@@ -32,11 +33,13 @@ class ELDataset():
 
     def __init__(self,
                  ontology,
+                 normalized=None,
+                 normalized_flag=False,
                  class_index_dict=None,
                  object_property_index_dict=None,
                  individual_index_dict=None,
                  extended=True,
-                 load_normalized = False,
+                 # load_normalized = False,
                  device="cpu"
                  ):
 
@@ -62,13 +65,15 @@ def __init__(self,
             raise TypeError("Optional parameter device must be of type str")
 
         self._ontology = ontology
+        self.normalized = normalized
+        self.normalized_flag = normalized_flag
         self._loaded = False
         self._extended = extended
         self._class_index_dict = class_index_dict
         self._object_property_index_dict = object_property_index_dict
         self._individual_index_dict = individual_index_dict
         self.device = device
-        self.load_normalized = load_normalized
+        # self.load_normalized = load_normalized
 
         self._gci0_dataset = None
         self._gci1_dataset = None
@@ -84,9 +89,11 @@ def load(self):
         if self._loaded:
             return
 
-        normalizer = ELNormalizer()
+        # normalizer = ELNormalizer()
 
-        gcis = normalizer.normalize(self._ontology, load=self.load_normalized)
+        # gcis = normalizer.normalize(self._ontology, load=self.load_normalized)
+
+        gcis = Dataset(ontology=self._ontology, normalized=self.normalized, normalized_flag=self.normalized_flag).normalized
 
         classes = set()
         relations = set()

diff --git a/mowl/lib/jcel-0.25.0-SNAPSHOT-javadoc.jar b/mowl/lib/jcel-0.25.0-SNAPSHOT-javadoc.jar
diff --git a/mowl/lib/jcel-0.25.0-SNAPSHOT-sources.jar b/mowl/lib/jcel-0.25.0-SNAPSHOT-sources.jar
diff --git a/mowl/lib/jcel-0.25.0-SNAPSHOT.jar b/mowl/lib/jcel-0.25.0-SNAPSHOT.jar
diff --git a/mowl/lib/jcel-core-0.24.1.jar → mowl/lib/jcel_old/jcel-core-0.24.1.jar b/mowl/lib/jcel-core-0.24.1.jar → mowl/lib/jcel_old/jcel-core-0.24.1.jar
diff --git a/mowl/lib/jcel-coreontology-0.24.1.jar → ...lib/jcel_old/jcel-coreontology-0.24.1.jar b/mowl/lib/jcel-coreontology-0.24.1.jar → ...lib/jcel_old/jcel-coreontology-0.24.1.jar
diff --git a/mowl/lib/jcel-ontology-0.24.1.jar → mowl/lib/jcel_old/jcel-ontology-0.24.1.jar b/mowl/lib/jcel-ontology-0.24.1.jar → mowl/lib/jcel_old/jcel-ontology-0.24.1.jar
diff --git a/mowl/lib/jcel-owlapi-0.24.1.jar → mowl/lib/jcel_old/jcel-owlapi-0.24.1.jar b/mowl/lib/jcel-owlapi-0.24.1.jar → mowl/lib/jcel_old/jcel-owlapi-0.24.1.jar
diff --git a/mowl/lib/jcel-reasoner-0.24.1.jar → mowl/lib/jcel_old/jcel-reasoner-0.24.1.jar b/mowl/lib/jcel-reasoner-0.24.1.jar → mowl/lib/jcel_old/jcel-reasoner-0.24.1.jar
diff --git a/mowl/ontology/normalize.py b/mowl/ontology/normalize.py
@@ -58,7 +58,7 @@ def normalize(self, ontology, load=False):
             ontology = self.preprocess_ontology(ontology)
             root_ont = ontology
             translator = Translator(ontology.getOWLOntologyManager().getOWLDataFactory(),
-                                    IntegerOntologyObjectFactoryImpl())
+                                    IntegerOntologyObjectFactoryImpl(6))
             # translator = jreasoner.getTranslator()
             axioms = HashSet()
             axioms.addAll(root_ont.getAxioms())
@@ -72,7 +72,10 @@ def normalize(self, ontology, load=False):
 
             normalizer = OntologyNormalizer()
 
-            factory = IntegerOntologyObjectFactoryImpl()
+            num_classes = len(list(root_ont.getClassesInSignature()))
+            num_object_properties = len(list(root_ont.getObjectPropertiesInSignature()))
+            num_individuals = len(list(root_ont.getIndividualsInSignature()))
+            factory = IntegerOntologyObjectFactoryImpl(6 + num_classes + num_object_properties + num_individuals + 1)
             normalized_ontology = normalizer.normalize(intAxioms, factory)
             self.rTranslator = ReverseAxiomTranslator(translator, ontology)
 
@@ -197,7 +200,7 @@ def process_axiom(axiom: OWLAxiom):
 
     if type(subclass) == OWLObjectIntersectionOfImpl:
         superclass = superclass.toStringID()
-        if superclass.contains("owl#Nothing"):
+        if superclass.contains("Nothing"):
             return "gci1_bot", GCI1_BOT(axiom)
         return "gci1", GCI1(axiom)
 

diff --git a/tests/base_models/test_elmodel.py b/tests/base_models/test_elmodel.py
@@ -46,13 +46,13 @@ def test_constructor_param_types(self):
             EmbeddingELModel(self.family_dataset, 1, 1, True, 1)
 
         # optional load_normalized
-        with self.assertRaisesRegex(TypeError, "Optional parameter load_normalized must be of \
-type bool."):
-            EmbeddingELModel(self.family_dataset, 1, 1, True, "model_filepath", 1)
+#         with self.assertRaisesRegex(TypeError, "Optional parameter load_normalized must be of \
+# type bool."):
+#             EmbeddingELModel(self.family_dataset, 1, 1, True, "model_filepath", 1)
 
         # optional device
         with self.assertRaisesRegex(TypeError, "Optional parameter device must be of type str."):
-            EmbeddingELModel(self.family_dataset, 1, 1, True, "model_filepath", False, 1)
+            EmbeddingELModel(self.family_dataset, 1, 1, True, "model_filepath", 1)
 
     def test_class_attribute_training_dataset(self):
         """This should check that the attribute training_datasets is a dictionary of \