Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Norm #83

Open
wants to merge 9 commits into
base: develop
Choose a base branch
from
35 changes: 26 additions & 9 deletions mowl/base_models/elmodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,14 @@ class EmbeddingELModel(Model):
:type device: str, optional
"""

def __init__(self, dataset, embed_dim, batch_size, extended=True, model_filepath=None, load_normalized=False, device="cpu"):
def __init__(self,
dataset,
embed_dim,
batch_size,
extended=True,
model_filepath=None,
# load_normalized=False,
device="cpu"):
super().__init__(dataset, model_filepath=model_filepath)

if not isinstance(embed_dim, int):
Expand All @@ -47,8 +54,8 @@ def __init__(self, dataset, embed_dim, batch_size, extended=True, model_filepath
if not isinstance(extended, bool):
raise TypeError("Optional parameter extended must be of type bool.")

if not isinstance(load_normalized, bool):
raise TypeError("Optional parameter load_normalized must be of type bool.")
# if not isinstance(load_normalized, bool):
# raise TypeError("Optional parameter load_normalized must be of type bool.")

if not isinstance(device, str):
raise TypeError("Optional parameter device must be of type str.")
Expand All @@ -59,7 +66,7 @@ def __init__(self, dataset, embed_dim, batch_size, extended=True, model_filepath
self.embed_dim = embed_dim
self.batch_size = batch_size
self.device = device
self.load_normalized = load_normalized
# self.load_normalized = load_normalized

self._training_datasets = None
self._validation_datasets = None
Expand All @@ -78,27 +85,37 @@ def _load_datasets(self):
return

training_el_dataset = ELDataset(self.dataset.ontology,
self.dataset.normalized,
self.dataset._normalized_flag,
self.class_index_dict,
self.object_property_index_dict,
extended=self._extended,
load_normalized = self.load_normalized,
# load_normalized = self.load_normalized,
device=self.device)

self._training_datasets = training_el_dataset.get_gci_datasets()

self._validation_datasets = None
if self.dataset.validation:
validation_el_dataset = ELDataset(self.dataset.validation, self.class_index_dict,
validation_el_dataset = ELDataset(self.dataset.validation,
self.dataset._normalized,
self.dataset._normalized_flag,
self.class_index_dict,
self.object_property_index_dict,
extended=self._extended, device=self.device)
extended=self._extended,
device=self.device)

self._validation_datasets = validation_el_dataset.get_gci_datasets()

self._testing_datasets = None
if self.dataset.testing:
testing_el_dataset = ELDataset(self.dataset.testing, self.class_index_dict,
testing_el_dataset = ELDataset(self.dataset.testing,
self.dataset._normalized,
self.dataset._normalized_flag,
self.class_index_dict,
self.object_property_index_dict,
extended=self._extended, device=self.device)
extended=self._extended,
device=self.device)

self._testing_datasets = testing_el_dataset.get_gci_datasets()

Expand Down
28 changes: 26 additions & 2 deletions mowl/datasets/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from mowl.owlapi.adapter import OWLAPIAdapter
from mowl.owlapi.defaults import TOP, BOT
from deprecated.sphinx import versionadded, versionchanged
from mowl.ontology.normalize import ELNormalizer


from java.util import HashSet
Expand All @@ -34,7 +35,7 @@ class Dataset():
:type testing: :class:`org.semanticweb.owlapi.model.OWLOntology`, optional
"""

def __init__(self, ontology, validation=None, testing=None):
def __init__(self, ontology, normalized=None, normalized_flag=False, validation=None, testing=None):

if not isinstance(ontology, OWLOntology):
raise TypeError("Parameter ontology must be an OWLOntology.")
Expand All @@ -49,11 +50,16 @@ def __init__(self, ontology, validation=None, testing=None):
self._validation = validation
self._testing = testing

self._normalized = normalized
self._classes = None
self._individuals = None
self._object_properties = None
self._individuals = None
self._evaluation_classes = None
self._normalized_flag = normalized_flag

def normalized_flag(self):
return self._normalized_flag

@property
def ontology(self):
Expand All @@ -79,6 +85,19 @@ def testing(self):
"""
return self._testing

@property
def normalized(self):
if self._normalized_flag:
return self._normalized
if self._normalized is not None:
self._normalized_flag = True
return self._normalized

elnorm = ELNormalizer()
self._normalized = elnorm.normalize(self._ontology)
self._normalized_flag = True
return self._normalized

@property
def classes(self):
"""List of classes in the dataset. The classes are collected from training, validation and
Expand All @@ -91,7 +110,10 @@ def classes(self):
top = adapter.create_class(TOP)
bot = adapter.create_class(BOT)
classes = set([top, bot])
classes |= set(self._ontology.getClassesInSignature())
for k in self.normalized.keys():
for ax in self.normalized[k]:
classes |= set(ax.owl_axiom.getClassesInSignature())
# classes |= set(self._ontology.getClassesInSignature())

if self._validation:
classes |= set(self._validation.getClassesInSignature())
Expand Down Expand Up @@ -202,6 +224,8 @@ def add_axioms(self, *axioms):
self._object_properties = None
self._individuals = None
self._evaluation_classes = None
self._normalized = None
self._normalized_flag = False



Expand Down
15 changes: 11 additions & 4 deletions mowl/datasets/el/el_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from mowl.datasets.gci import GCIDataset, ClassAssertionDataset, ObjectPropertyAssertionDataset
import random
from org.semanticweb.owlapi.model import OWLOntology
from mowl.datasets.base import Dataset


class ELDataset():
Expand Down Expand Up @@ -32,11 +33,13 @@ class ELDataset():

def __init__(self,
ontology,
normalized=None,
normalized_flag=False,
class_index_dict=None,
object_property_index_dict=None,
individual_index_dict=None,
extended=True,
load_normalized = False,
# load_normalized = False,
device="cpu"
):

Expand All @@ -62,13 +65,15 @@ def __init__(self,
raise TypeError("Optional parameter device must be of type str")

self._ontology = ontology
self.normalized = normalized
self.normalized_flag = normalized_flag
self._loaded = False
self._extended = extended
self._class_index_dict = class_index_dict
self._object_property_index_dict = object_property_index_dict
self._individual_index_dict = individual_index_dict
self.device = device
self.load_normalized = load_normalized
# self.load_normalized = load_normalized

self._gci0_dataset = None
self._gci1_dataset = None
Expand All @@ -84,9 +89,11 @@ def load(self):
if self._loaded:
return

normalizer = ELNormalizer()
# normalizer = ELNormalizer()

gcis = normalizer.normalize(self._ontology, load=self.load_normalized)
# gcis = normalizer.normalize(self._ontology, load=self.load_normalized)

gcis = Dataset(ontology=self._ontology, normalized=self.normalized, normalized_flag=self.normalized_flag).normalized

classes = set()
relations = set()
Expand Down
Binary file added mowl/lib/jcel-0.25.0-SNAPSHOT-javadoc.jar
Binary file not shown.
Binary file added mowl/lib/jcel-0.25.0-SNAPSHOT-sources.jar
Binary file not shown.
Binary file added mowl/lib/jcel-0.25.0-SNAPSHOT.jar
Binary file not shown.
File renamed without changes.
9 changes: 6 additions & 3 deletions mowl/ontology/normalize.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def normalize(self, ontology, load=False):
ontology = self.preprocess_ontology(ontology)
root_ont = ontology
translator = Translator(ontology.getOWLOntologyManager().getOWLDataFactory(),
IntegerOntologyObjectFactoryImpl())
IntegerOntologyObjectFactoryImpl(6))
# translator = jreasoner.getTranslator()
axioms = HashSet()
axioms.addAll(root_ont.getAxioms())
Expand All @@ -72,7 +72,10 @@ def normalize(self, ontology, load=False):

normalizer = OntologyNormalizer()

factory = IntegerOntologyObjectFactoryImpl()
num_classes = len(list(root_ont.getClassesInSignature()))
num_object_properties = len(list(root_ont.getObjectPropertiesInSignature()))
num_individuals = len(list(root_ont.getIndividualsInSignature()))
factory = IntegerOntologyObjectFactoryImpl(6 + num_classes + num_object_properties + num_individuals + 1)
normalized_ontology = normalizer.normalize(intAxioms, factory)
self.rTranslator = ReverseAxiomTranslator(translator, ontology)

Expand Down Expand Up @@ -197,7 +200,7 @@ def process_axiom(axiom: OWLAxiom):

if type(subclass) == OWLObjectIntersectionOfImpl:
superclass = superclass.toStringID()
if superclass.contains("owl#Nothing"):
if superclass.contains("Nothing"):
return "gci1_bot", GCI1_BOT(axiom)
return "gci1", GCI1(axiom)

Expand Down
8 changes: 4 additions & 4 deletions tests/base_models/test_elmodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,13 +46,13 @@ def test_constructor_param_types(self):
EmbeddingELModel(self.family_dataset, 1, 1, True, 1)

# optional load_normalized
with self.assertRaisesRegex(TypeError, "Optional parameter load_normalized must be of \
type bool."):
EmbeddingELModel(self.family_dataset, 1, 1, True, "model_filepath", 1)
# with self.assertRaisesRegex(TypeError, "Optional parameter load_normalized must be of \
# type bool."):
# EmbeddingELModel(self.family_dataset, 1, 1, True, "model_filepath", 1)

# optional device
with self.assertRaisesRegex(TypeError, "Optional parameter device must be of type str."):
EmbeddingELModel(self.family_dataset, 1, 1, True, "model_filepath", False, 1)
EmbeddingELModel(self.family_dataset, 1, 1, True, "model_filepath", 1)

def test_class_attribute_training_dataset(self):
"""This should check that the attribute training_datasets is a dictionary of \
Expand Down
Loading