From 726cea6e04a00eeaad881774b14b5e7119dea1ce Mon Sep 17 00:00:00 2001 From: Fernando Zhapa-Camacho Date: Thu, 21 Nov 2024 13:46:16 +0300 Subject: [PATCH] =?UTF-8?q?=E2=9C=A8=20Support=20for=20individuals=20in=20?= =?UTF-8?q?EL=20models?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.rst | 9 - mowl/base_models/elmodel.py | 2 +- mowl/datasets/gci.py | 2 +- mowl/models/__init__.py | 6 +- mowl/models/boxsquaredel/model.py | 109 +++++++++-- .../{elboxembeddings => elbe}/__init__.py | 0 .../{elboxembeddings => elbe}/evaluate.py | 0 .../examples/__init__.py | 0 .../examples/model_gda.py | 4 +- .../examples/model_ppi.py | 4 +- mowl/models/elbe/model.py | 156 ++++++++++++++++ mowl/models/elboxembeddings/model.py | 73 -------- mowl/models/elembeddings/model.py | 89 ++++++++- mowl/nn/__init__.py | 2 +- mowl/nn/el/boxsquaredel/losses.py | 173 ++++++------------ mowl/nn/el/boxsquaredel/module.py | 25 ++- mowl/nn/el/elbe/losses.py | 83 ++++----- mowl/nn/el/elbe/module.py | 22 +-- mowl/nn/el/elem/losses.py | 92 +++++----- mowl/nn/el/elem/module.py | 18 +- setup.py | 2 +- tests/base_models/test_elmodel.py | 62 +++---- tests/nn/test_boxsquaredel_module.py | 3 +- ...st_elbox_module.py => test_elbe_module.py} | 7 +- tests/nn/test_elem_module.py | 3 +- 25 files changed, 550 insertions(+), 396 deletions(-) delete mode 100644 README.rst rename mowl/models/{elboxembeddings => elbe}/__init__.py (100%) rename mowl/models/{elboxembeddings => elbe}/evaluate.py (100%) rename mowl/models/{elboxembeddings => elbe}/examples/__init__.py (100%) rename mowl/models/{elboxembeddings => elbe}/examples/model_gda.py (97%) rename mowl/models/{elboxembeddings => elbe}/examples/model_ppi.py (97%) create mode 100644 mowl/models/elbe/model.py delete mode 100644 mowl/models/elboxembeddings/model.py rename tests/nn/{test_elbox_module.py => test_elbe_module.py} (92%) diff --git a/README.rst b/README.rst deleted file mode 100644 index 07eac7fd..00000000 --- a/README.rst +++ /dev/null @@ -1,9 +0,0 @@ -Template for the Read the Docs tutorial -======================================= - -This GitHub template includes fictional Python library -with some basic Sphinx docs. - -Read the tutorial here: - -https://docs.readthedocs.io/en/stable/tutorial/ diff --git a/mowl/base_models/elmodel.py b/mowl/base_models/elmodel.py index f654e020..c63212eb 100644 --- a/mowl/base_models/elmodel.py +++ b/mowl/base_models/elmodel.py @@ -396,7 +396,7 @@ def from_pretrained(self, model): if not isinstance(model, str): raise TypeError - self.module.load_state_dict(th.load(model)) + self.module.load_state_dict(th.load(model, weights_only=True)) #self._kge_method = kge_method diff --git a/mowl/datasets/gci.py b/mowl/datasets/gci.py index 23665183..ebc35f10 100644 --- a/mowl/datasets/gci.py +++ b/mowl/datasets/gci.py @@ -64,7 +64,7 @@ def push_to_device(self, data): for gci in data: class_ = self.class_index_dict[gci.class_] individual = self.individual_index_dict[gci.individual] - pretensor.append([class_, individual]) + pretensor.append([individual, class_]) tensor = th.tensor(pretensor).to(self.device) return tensor diff --git a/mowl/models/__init__.py b/mowl/models/__init__.py index bfb43f7d..1cb673b4 100644 --- a/mowl/models/__init__.py +++ b/mowl/models/__init__.py @@ -2,9 +2,9 @@ from mowl.models.elembeddings.examples.model_ppi import ELEmPPI from mowl.models.elembeddings.examples.model_gda import ELEmGDA -from mowl.models.elboxembeddings.model import ELBoxEmbeddings -from mowl.models.elboxembeddings.examples.model_ppi import ELBoxPPI -from mowl.models.elboxembeddings.examples.model_gda import ELBoxGDA +from mowl.models.elbe.model import ELBoxEmbeddings, ELBE +from mowl.models.elbe.examples.model_ppi import ELBEPPI +from mowl.models.elbe.examples.model_gda import ELBEGDA from mowl.models.boxsquaredel.model import BoxSquaredEL diff --git a/mowl/models/boxsquaredel/model.py b/mowl/models/boxsquaredel/model.py index adcdefc6..f046a946 100644 --- a/mowl/models/boxsquaredel/model.py +++ b/mowl/models/boxsquaredel/model.py @@ -1,14 +1,19 @@ - -from mowl.nn import BoxSquaredELModule from mowl.base_models.elmodel import EmbeddingELModel - +from mowl.nn import BoxSquaredELModule +from tqdm import trange, tqdm import torch as th -from torch import nn +import numpy as np +import logging + +logger = logging.getLogger(__name__) +handler = logging.StreamHandler() +logger.addHandler(handler) +logger.setLevel(logging.INFO) class BoxSquaredEL(EmbeddingELModel): """ - Implementation based on [peng2020]_. + Implementation based on [jackermeier2023]_. """ def __init__(self, @@ -27,7 +32,6 @@ def __init__(self, ): super().__init__(dataset, embed_dim, batch_size, extended=True, model_filepath=model_filepath) - self.margin = margin self.reg_norm = reg_norm self.delta = delta @@ -44,6 +48,7 @@ def init_module(self): self.module = BoxSquaredELModule( len(self.class_index_dict), len(self.object_property_index_dict), + len(self.individual_index_dict), embed_dim=self.embed_dim, gamma=self.margin, delta=self.delta, @@ -51,12 +56,78 @@ def init_module(self): ).to(self.device) - def train(self): - raise NotImplementedError + def train(self, epochs=None, validate_every=1): + logger.warning('You are using the default training method. If you want to use a cutomized training method (e.g., different negative sampling, etc.), please reimplement the train method in a subclass.') + + points_per_dataset = {k: len(v) for k, v in self.training_datasets.items()} + string = "Training datasets: \n" + for k, v in points_per_dataset.items(): + string += f"\t{k}: {v}\n" + + logger.info(string) + + optimizer = th.optim.Adam(self.module.parameters(), lr=self.learning_rate) + best_loss = float('inf') + + all_classes_ids = list(self.class_index_dict.values()) + all_inds_ids = list(self.individual_index_dict.values()) + + if epochs is None: + epochs = self.epochs + + for epoch in trange(epochs): + self.module.train() + + train_loss = 0 + loss = 0 + + for gci_name, gci_dataset in self.training_datasets.items(): + if len(gci_dataset) == 0: + continue + + loss += th.mean(self.module(gci_dataset[:], gci_name)) + if gci_name == "gci2": + idxs_for_negs = np.random.choice(all_classes_ids, size=len(gci_dataset), replace=True) + rand_index = th.tensor(idxs_for_negs).to(self.device) + data = gci_dataset[:] + neg_data = th.cat([data[:, :2], rand_index.unsqueeze(1)], dim=1) + loss += th.mean(self.module(neg_data, gci_name, neg=True)) + + if gci_name == "object_property_assertion": + idxs_for_negs = np.random.choice(all_inds_ids, size=len(gci_dataset), replace=True) + rand_index = th.tensor(idxs_for_negs).to(self.device) + data = gci_dataset[:] + neg_data = th.cat([data[:, :2], rand_index.unsqueeze(1)], dim=1) + loss += th.mean(self.module(neg_data, gci_name, neg=True)) + + loss += self.module.regularization_loss() + + optimizer.zero_grad() + loss.backward() + optimizer.step() + train_loss += loss.detach().item() + + loss = 0 + + if (epoch + 1) % validate_every == 0: + if self.dataset.validation is not None: + with th.no_grad(): + self.module.eval() + valid_loss = 0 + gci2_data = self.validation_datasets["gci2"][:] + loss = th.mean(self.module(gci2_data, "gci2")) + valid_loss += loss.detach().item() + - + if valid_loss < best_loss: + best_loss = valid_loss + th.save(self.module.state_dict(), self.model_filepath) + print(f'Epoch {epoch+1}: Train loss: {train_loss} Valid loss: {valid_loss}') + else: + print(f'Epoch {epoch+1}: Train loss: {train_loss}') + def eval_method(self, data): - return self.module.gci2_score(data) + return self.module.gci2_loss(data) def get_embeddings(self): self.init_module() @@ -64,11 +135,19 @@ def get_embeddings(self): print('Load the best model', self.model_filepath) self.load_best_model() - ent_embeds = {k: v for k, v in zip(self.class_index_dict.keys(), - self.module.class_embed.weight.cpu().detach().numpy())} - rel_embeds = {k: v for k, v in zip(self.object_property_index_dict.keys(), - self.module.rel_embed.weight.cpu().detach().numpy())} - return ent_embeds, rel_embeds + ent_embeds = { + k: v for k, v in zip(self.class_index_dict.keys(), + self.module.class_embed.weight.cpu().detach().numpy())} + rel_embeds = { + k: v for k, v in zip(self.object_property_index_dict.keys(), + self.module.rel_embed.weight.cpu().detach().numpy())} + if self.module.ind_embed is not None: + ind_embeds = { + k: v for k, v in zip(self.individual_index_dict.keys(), + self.module.ind_embed.weight.cpu().detach().numpy())} + else: + ind_embeds = None + return ent_embeds, rel_embeds, ind_embeds def load_best_model(self): self.init_module() diff --git a/mowl/models/elboxembeddings/__init__.py b/mowl/models/elbe/__init__.py similarity index 100% rename from mowl/models/elboxembeddings/__init__.py rename to mowl/models/elbe/__init__.py diff --git a/mowl/models/elboxembeddings/evaluate.py b/mowl/models/elbe/evaluate.py similarity index 100% rename from mowl/models/elboxembeddings/evaluate.py rename to mowl/models/elbe/evaluate.py diff --git a/mowl/models/elboxembeddings/examples/__init__.py b/mowl/models/elbe/examples/__init__.py similarity index 100% rename from mowl/models/elboxembeddings/examples/__init__.py rename to mowl/models/elbe/examples/__init__.py diff --git a/mowl/models/elboxembeddings/examples/model_gda.py b/mowl/models/elbe/examples/model_gda.py similarity index 97% rename from mowl/models/elboxembeddings/examples/model_gda.py rename to mowl/models/elbe/examples/model_gda.py index 04c176c5..102eb6ee 100644 --- a/mowl/models/elboxembeddings/examples/model_gda.py +++ b/mowl/models/elbe/examples/model_gda.py @@ -1,4 +1,4 @@ -from mowl.models import ELBoxEmbeddings +from mowl.models import ELBE from mowl.projection.factory import projector_factory from mowl.projection.edge import Edge @@ -12,7 +12,7 @@ from torch import nn -class ELBoxGDA(ELBoxEmbeddings): +class ELBEGDA(ELBE): """ Example of ELBoxEmbeddings for gene-disease associations prediction. """ diff --git a/mowl/models/elboxembeddings/examples/model_ppi.py b/mowl/models/elbe/examples/model_ppi.py similarity index 97% rename from mowl/models/elboxembeddings/examples/model_ppi.py rename to mowl/models/elbe/examples/model_ppi.py index 5b376d18..3b6cdde4 100644 --- a/mowl/models/elboxembeddings/examples/model_ppi.py +++ b/mowl/models/elbe/examples/model_ppi.py @@ -1,4 +1,4 @@ -from mowl.models import ELBoxEmbeddings +from mowl.models import ELBE from mowl.projection.factory import projector_factory from mowl.projection.edge import Edge import math @@ -13,7 +13,7 @@ from torch import nn -class ELBoxPPI(ELBoxEmbeddings): +class ELBEPPI(ELBE): """ Example of ELBoxEmbeddings for protein-protein interaction prediction. """ diff --git a/mowl/models/elbe/model.py b/mowl/models/elbe/model.py new file mode 100644 index 00000000..1185c9a5 --- /dev/null +++ b/mowl/models/elbe/model.py @@ -0,0 +1,156 @@ +from mowl.base_models.elmodel import EmbeddingELModel +from mowl.nn import ELBEModule +from tqdm import trange, tqdm +import torch as th +import numpy as np +from deprecated.sphinx import deprecated +import logging + +logger = logging.getLogger(__name__) +handler = logging.StreamHandler() +logger.addHandler(handler) +logger.setLevel(logging.INFO) + +class ELBE(EmbeddingELModel): + """ + Implementation based on [peng2020]_. + """ + + def __init__(self, + dataset, + embed_dim=50, + margin=0, + reg_norm=1, + learning_rate=0.001, + epochs=1000, + batch_size=4096 * 8, + model_filepath=None, + device='cpu' + ): + super().__init__(dataset, embed_dim, batch_size, extended=True, model_filepath=model_filepath) + + self.margin = margin + self.reg_norm = reg_norm + self.learning_rate = learning_rate + self.epochs = epochs + self.device = device + self._loaded = False + self.extended = False + self.init_module() + + def init_module(self): + self.module = ELBEModule( + len(self.class_index_dict), + len(self.object_property_index_dict), + len(self.individual_index_dict), + embed_dim=self.embed_dim, + margin=self.margin + ).to(self.device) + + def train(self, epochs=None, validate_every=1): + logger.warning('You are using the default training method. If you want to use a cutomized training method (e.g., different negative sampling, etc.), please reimplement the train method in a subclass.') + + points_per_dataset = {k: len(v) for k, v in self.training_datasets.items()} + string = "Training datasets: \n" + for k, v in points_per_dataset.items(): + string += f"\t{k}: {v}\n" + + logger.info(string) + + optimizer = th.optim.Adam(self.module.parameters(), lr=self.learning_rate) + criterion = th.nn.MSELoss() + best_loss = float('inf') + + all_classes_ids = list(self.class_index_dict.values()) + all_inds_ids = list(self.individual_index_dict.values()) + + if epochs is None: + epochs = self.epochs + + for epoch in trange(epochs): + self.module.train() + + train_loss = 0 + loss = 0 + + for gci_name, gci_dataset in self.training_datasets.items(): + if len(gci_dataset) == 0: + continue + + scores = th.mean(self.module(gci_dataset[:], gci_name)) + loss += criterion(scores, th.zeros_like(scores, requires_grad=False)) + + if gci_name == "gci2": + idxs_for_negs = np.random.choice(all_classes_ids, size=len(gci_dataset), replace=True) + rand_index = th.tensor(idxs_for_negs).to(self.device) + data = gci_dataset[:] + neg_data = th.cat([data[:, :2], rand_index.unsqueeze(1)], dim=1) + scores = th.mean(self.module(neg_data, gci_name, neg=True)) + loss += criterion(scores, th.ones_like(scores, requires_grad=False)) + + if gci_name == "object_property_assertion": + idxs_for_negs = np.random.choice(all_inds_ids, size=len(gci_dataset), replace=True) + rand_index = th.tensor(idxs_for_negs).to(self.device) + data = gci_dataset[:] + neg_data = th.cat([data[:, :2], rand_index.unsqueeze(1)], dim=1) + scores = th.mean(self.module(neg_data, gci_name, neg=True)) + loss += criterion(scores, th.ones_like(scores, requires_grad=False)) + + optimizer.zero_grad() + loss.backward() + optimizer.step() + train_loss += loss.detach().item() + + loss = 0 + + if (epoch + 1) % validate_every == 0: + if self.dataset.validation is not None: + with th.no_grad(): + self.module.eval() + valid_loss = 0 + gci2_data = self.validation_datasets["gci2"][:] + loss = th.mean(self.module(gci2_data, "gci2")) + valid_loss += loss.detach().item() + + + if valid_loss < best_loss: + best_loss = valid_loss + th.save(self.module.state_dict(), self.model_filepath) + print(f'Epoch {epoch+1}: Train loss: {train_loss} Valid loss: {valid_loss}') + else: + print(f'Epoch {epoch+1}: Train loss: {train_loss}') + + def eval_method(self, data): + return self.module.gci2_loss(data) + + def get_embeddings(self): + self.init_module() + + print('Load the best model', self.model_filepath) + self.load_best_model() + + ent_embeds = { + k: v for k, v in zip(self.class_index_dict.keys(), + self.module.class_embed.weight.cpu().detach().numpy())} + rel_embeds = { + k: v for k, v in zip(self.object_property_index_dict.keys(), + self.module.rel_embed.weight.cpu().detach().numpy())} + if self.module.ind_embed is not None: + ind_embeds = { + k: v for k, v in zip(self.individual_index_dict.keys(), + self.module.ind_embed.weight.cpu().detach().numpy())} + else: + ind_embeds = None + return ent_embeds, rel_embeds, ind_embeds + + def load_best_model(self): + self.init_module() + self.module.load_state_dict(th.load(self.model_filepath)) + self.module.eval() + + +@deprecated(version='1.0.2', reason="Use ELBoxEmbeddings instead.") +class ELBoxEmbeddings(ELBE): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + diff --git a/mowl/models/elboxembeddings/model.py b/mowl/models/elboxembeddings/model.py deleted file mode 100644 index 8011bd8b..00000000 --- a/mowl/models/elboxembeddings/model.py +++ /dev/null @@ -1,73 +0,0 @@ - -from mowl.nn import ELBoxModule -from mowl.base_models.elmodel import EmbeddingELModel -from mowl.evaluation import PPIEvaluator - -import torch as th -from torch import nn - - -class ELBoxEmbeddings(EmbeddingELModel): - """ - Implementation based on [peng2020]_. - """ - - def __init__(self, - dataset, - embed_dim=50, - margin=0, - reg_norm=1, - learning_rate=0.001, - epochs=1000, - batch_size=4096 * 8, - model_filepath=None, - device='cpu' - ): - super().__init__(dataset, embed_dim, batch_size, extended=True, model_filepath=model_filepath) - - - self.margin = margin - self.reg_norm = reg_norm - self.learning_rate = learning_rate - self.epochs = epochs - self.device = device - self._loaded = False - self.extended = False - self.init_module() - - self.set_evaluator(PPIEvaluator) - - def init_module(self): - self.module = ELBoxModule( - len(self.class_index_dict), - len(self.object_property_index_dict), - embed_dim=self.embed_dim, - margin=self.margin - ).to(self.device) - - def train(self): - raise NotImplementedError - - - def eval_method(self, data): - return self.module.gci2_loss(data) - - - def get_embeddings(self): - self.init_module() - - print('Load the best model', self.model_filepath) - self.load_best_model() - - ent_embeds = {k: v for k, v in zip(self.class_index_dict.keys(), - self.module.class_embed.weight.cpu().detach().numpy())} - rel_embeds = {k: v for k, v in zip(self.object_property_index_dict.keys(), - self.module.rel_embed.weight.cpu().detach().numpy())} - return ent_embeds, rel_embeds - - def load_best_model(self): - self.init_module() - self.module.load_state_dict(th.load(self.model_filepath)) - self.module.eval() - - diff --git a/mowl/models/elembeddings/model.py b/mowl/models/elembeddings/model.py index c593d9f8..82f2b4ee 100644 --- a/mowl/models/elembeddings/model.py +++ b/mowl/models/elembeddings/model.py @@ -1,10 +1,15 @@ - - from mowl.base_models.elmodel import EmbeddingELModel from mowl.nn import ELEmModule from tqdm import trange, tqdm import torch as th import numpy as np +import logging + +logger = logging.getLogger(__name__) +handler = logging.StreamHandler() +logger.addHandler(handler) +logger.setLevel(logging.INFO) + class ELEmbeddings(EmbeddingELModel): """ @@ -43,12 +48,80 @@ def init_module(self): self.module = ELEmModule( len(self.class_index_dict), # number of ontology classes len(self.object_property_index_dict), # number of ontology object properties + len(self.individual_index_dict), # number of individuals embed_dim=self.embed_dim, margin=self.margin ).to(self.device) - def train(self): - raise NotImplementedError + def train(self, epochs=None, validate_every=1): + logger.warning('You are using the default training method. If you want to use a cutomized training method (e.g., different negative sampling, etc.), please reimplement the train method in a subclass.') + + points_per_dataset = {k: len(v) for k, v in self.training_datasets.items()} + string = "Training datasets: \n" + for k, v in points_per_dataset.items(): + string += f"\t{k}: {v}\n" + + logger.info(string) + + optimizer = th.optim.Adam(self.module.parameters(), lr=self.learning_rate) + best_loss = float('inf') + + all_classes_ids = list(self.class_index_dict.values()) + all_inds_ids = list(self.individual_index_dict.values()) + + if epochs is None: + epochs = self.epochs + + for epoch in trange(epochs): + self.module.train() + + train_loss = 0 + loss = 0 + + for gci_name, gci_dataset in self.training_datasets.items(): + if len(gci_dataset) == 0: + continue + + loss += th.mean(self.module(gci_dataset[:], gci_name)) + if gci_name == "gci2": + idxs_for_negs = np.random.choice(all_classes_ids, size=len(gci_dataset), replace=True) + rand_index = th.tensor(idxs_for_negs).to(self.device) + data = gci_dataset[:] + neg_data = th.cat([data[:, :2], rand_index.unsqueeze(1)], dim=1) + loss += th.mean(self.module(neg_data, gci_name, neg=True)) + + if gci_name == "object_property_assertion": + idxs_for_negs = np.random.choice(all_inds_ids, size=len(gci_dataset), replace=True) + rand_index = th.tensor(idxs_for_negs).to(self.device) + data = gci_dataset[:] + neg_data = th.cat([data[:, :2], rand_index.unsqueeze(1)], dim=1) + loss += th.mean(self.module(neg_data, gci_name, neg=True)) + + loss += self.module.regularization_loss() + + optimizer.zero_grad() + loss.backward() + optimizer.step() + train_loss += loss.detach().item() + + loss = 0 + + if (epoch + 1) % validate_every == 0: + if self.dataset.validation is not None: + with th.no_grad(): + self.module.eval() + valid_loss = 0 + gci2_data = self.validation_datasets["gci2"][:] + loss = th.mean(self.module(gci2_data, "gci2")) + valid_loss += loss.detach().item() + + + if valid_loss < best_loss: + best_loss = valid_loss + th.save(self.module.state_dict(), self.model_filepath) + print(f'Epoch {epoch+1}: Train loss: {train_loss} Valid loss: {valid_loss}') + else: + print(f'Epoch {epoch+1}: Train loss: {train_loss}') def eval_method(self, data): return self.module.gci2_loss(data) @@ -65,7 +138,13 @@ def get_embeddings(self): rel_embeds = { k: v for k, v in zip(self.object_property_index_dict.keys(), self.module.rel_embed.weight.cpu().detach().numpy())} - return ent_embeds, rel_embeds + if self.module.ind_embed is not None: + ind_embeds = { + k: v for k, v in zip(self.individual_index_dict.keys(), + self.module.ind_embed.weight.cpu().detach().numpy())} + else: + ind_embeds = None + return ent_embeds, rel_embeds, ind_embeds def load_best_model(self): self.init_module() diff --git a/mowl/nn/__init__.py b/mowl/nn/__init__.py index 3e93ef87..8a59d415 100644 --- a/mowl/nn/__init__.py +++ b/mowl/nn/__init__.py @@ -1,5 +1,5 @@ from .el.elmodule import ELModule from .el.elem.module import ELEmModule -from .el.elbe.module import ELBoxModule, ELBEModule +from .el.elbe.module import ELBEModule from .el.boxel.module import BoxELModule from .el.boxsquaredel.module import BoxSquaredELModule diff --git a/mowl/nn/el/boxsquaredel/losses.py b/mowl/nn/el/boxsquaredel/losses.py index 83077925..b04f3a7a 100644 --- a/mowl/nn/el/boxsquaredel/losses.py +++ b/mowl/nn/el/boxsquaredel/losses.py @@ -24,96 +24,36 @@ def inclusion_score(box_a, box_b, gamma): score = th.linalg.norm(th.relu(dist_a_b + 2*offset_a - gamma), dim=1) return score -def class_assertion_loss(data, class_center, class_offset, ind_center, gamma, neg = False): - center_c = class_center(data[:, 0]) - offset_c = th.abs(class_offset(data[:, 0])) - center_ind = ind_center(data[:, 1]) - - box_c = (center_c, offset_c) - box_d = (center_ind, th.zeros_like(offset_c, device=offset_c.device)) - score = inclusion_score(box_d, box_c, gamma) - return score - - -def object_property_assertion_score(data, head_center, head_offset, tail_center, tail_offset, ind_center, bump_individuals, gamma, delta): - center_c = ind_center(data[:, 0]) - - center_head = head_center(data[:, 1]) - offset_head = th.abs(head_offset(data[:, 1])) - - center_tail = tail_center(data[:, 1]) - offset_tail = th.abs(tail_offset(data[:, 1])) - - center_d = ind_center(data[:, 2]) +def class_assertion_loss(data, ind_center, ind_offset, class_center, class_offset, gamma, neg = False): + center_i = ind_center(data[:, 0]) + offset_i = th.abs(ind_offset(data[:, 0])) + center_c = class_center(data[:, 1]) + offset_c = th.abs(class_offset(data[:, 1])) - offset_c = th.zeros_like(offset_head, device=offset_head.device) - offset_d = th.zeros_like(offset_head, device=offset_head.device) - - bump_c = bump_individuals(data[:, 0]) - bump_d = bump_individuals(data[:, 2]) - box_c = (center_c, offset_c) - box_head = (center_head, offset_head) - box_tail = (center_tail, offset_tail) - box_d = (center_d, offset_d) - - bumped_c = (center_c + bump_d, offset_c) - bumped_d = (center_d + bump_c, offset_d) - - inclussion_1 = inclusion_score(bumped_c, box_head, gamma) - inclussion_2 = inclusion_score(bumped_d, box_tail, gamma) - - score = (inclussion_1 + inclussion_2)/2 + box_d = (center_i, offset_i) + score = inclusion_score(box_d, box_c, gamma) return score -def object_property_assertion_loss(data, head_center, head_offset, tail_center, tail_offset, ind_center, bump_individuals, gamma, delta, reg_factor, neg=False): +def object_property_assertion_loss(data, ind_center, ind_offset, head_center, head_offset, tail_center, tail_offset, bump_individuals, gamma, delta, neg=False): if neg: - return object_property_assertion_loss_neg(data, head_center, head_offset, tail_center, tail_offset, ind_center, bump_individuals, gamma, delta, reg_factor) + fn = gci2_score_neg else: - score = object_property_assertion_score(data, head_center, head_offset, tail_center, tail_offset, ind_center, bump_individuals, gamma, delta) - loss = score.square() - reg_loss = 0#reg_factor * th.linalg.norm(bump.weight, dim=1) - return loss + reg_loss - - -def object_property_assertion_loss_neg(data, head_center, head_offset, tail_center, tail_offset, ind_center, bump_individuals, gamma, delta, reg_factor): + fn = gci2_score - def minimal_distance(box_a, box_b, gamma): - dist = box_distance(box_a, box_b) - min_dist = th.linalg.norm(th.relu(dist + gamma), dim=1) - return min_dist + center_i1 = ind_center(data[:, 0]) + offset_i1 = th.abs(ind_offset(data[:, 0])) + bump_i1 = bump_individuals(data[:, 0]) + center_i2 = ind_center(data[:, 2]) + offset_i2 = th.abs(ind_offset(data[:, 2])) + bump_i2 = bump_individuals(data[:, 2]) - center_c = ind_center(data[:, 0]) - center_head = head_center(data[:, 1]) offset_head = th.abs(head_offset(data[:, 1])) center_tail = tail_center(data[:, 1]) offset_tail = th.abs(tail_offset(data[:, 1])) - center_d = ind_center(data[:, 2]) - - offset_c = th.zeros_like(offset_head, device=offset_head.device) - offset_d = th.zeros_like(offset_head, device=offset_head.device) - - bump_c = bump(data[:, 0]) - bump_d = bump(data[:, 2]) - - box_c = (center_c, offset_c) - box_head = (center_head, offset_head) - box_tail = (center_tail, offset_tail) - box_d = (center_d, offset_d) - - bumped_c = (center_c + bump_d, offset_c) - bumped_d = (center_d + bump_c, offset_d) - - first_part = (delta - minimal_distance(bumped_c, box_head, gamma)).square() - second_part = (delta - minimal_distance(bumped_d, box_tail, gamma)).square() - - loss = first_part + second_part - reg_loss = 0#reg_factor * th.linalg.norm(bump.weight, dim=1) - return loss + reg_loss - - + return fn(center_i1, offset_i1, bump_i1, center_i2, offset_i2, bump_i2, center_head, offset_head, center_tail, offset_tail, gamma, delta) def gci0_score(data, class_center, class_offset, gamma): center_c = class_center(data[:, 0]) @@ -184,22 +124,12 @@ def gci1_bot_loss(data, class_center, class_offset, gamma, neg=False): loss = score.square() return loss -def gci2_score(data, class_center, class_offset, head_center, head_offset, tail_center, tail_offset, bump, gamma, delta): - center_c = class_center(data[:, 0]) - offset_c = th.abs(class_offset(data[:, 0])) - - center_head = head_center(data[:, 1]) - offset_head = th.abs(head_offset(data[:, 1])) - - center_tail = tail_center(data[:, 1]) - offset_tail = th.abs(tail_offset(data[:, 1])) - - center_d = class_center(data[:, 2]) - offset_d = th.abs(class_offset(data[:, 2])) - - bump_c = bump(data[:, 0]) - bump_d = bump(data[:, 2]) - +def gci2_score(center_c, offset_c, bump_c, + center_d, offset_d, bump_d, + center_head, offset_head, + center_tail, offset_tail, + gamma, delta): + box_c = (center_c, offset_c) box_head = (center_head, offset_head) box_tail = (center_tail, offset_tail) @@ -215,34 +145,17 @@ def gci2_score(data, class_center, class_offset, head_center, head_offset, tail_ return score -def gci2_loss(data, class_center, class_offset, head_center, head_offset, tail_center, tail_offset, bump, gamma, delta, reg_factor, neg=False): - if neg: - return gci2_loss_neg(data, class_center, class_offset, head_center, head_offset, tail_center, tail_offset, bump, gamma, delta, reg_factor) - else: - score = gci2_score(data, class_center, class_offset, head_center, head_offset, tail_center, tail_offset, bump, gamma, delta) - loss = score.square() - reg_loss = 0#reg_factor * th.linalg.norm(bump.weight, dim=1) - return loss + reg_loss - - -def gci2_loss_neg(data, class_center, class_offset, head_center, head_offset, tail_center, tail_offset, bump, gamma, delta, reg_factor): +def gci2_score_neg(center_c, offset_c, bump_c, + center_d, offset_d, bump_d, + center_head, offset_head, + center_tail, offset_tail, + gamma, delta): def minimal_distance(box_a, box_b, gamma): dist = box_distance(box_a, box_b) min_dist = th.linalg.norm(th.relu(dist + gamma), dim=1) return min_dist - - center_c = class_center(data[:, 0]) - offset_c = th.abs(class_offset(data[:, 0])) - center_head = head_center(data[:, 1]) - offset_head = th.abs(head_offset(data[:, 1])) - center_tail = tail_center(data[:, 1]) - offset_tail = th.abs(tail_offset(data[:, 1])) - center_d = class_center(data[:, 2]) - offset_d = th.abs(class_offset(data[:, 2])) - bump_c = bump(data[:, 0]) - bump_d = bump(data[:, 2]) - + box_c = (center_c, offset_c) box_head = (center_head, offset_head) box_tail = (center_tail, offset_tail) @@ -255,9 +168,27 @@ def minimal_distance(box_a, box_b, gamma): second_part = (delta - minimal_distance(bumped_d, box_tail, gamma)).square() loss = first_part + second_part - reg_loss = 0#reg_factor * th.linalg.norm(bump.weight, dim=1) - return loss + reg_loss + return loss +def gci2_loss(data, class_center, class_offset, head_center, head_offset, tail_center, tail_offset, bump, gamma, delta, neg=False): + if neg: + fn = gci2_score_neg + else: + fn = gci2_score + + center_c = class_center(data[:, 0]) + offset_c = th.abs(class_offset(data[:, 0])) + bump_c = bump(data[:, 0]) + center_d = class_center(data[:, 2]) + offset_d = th.abs(class_offset(data[:, 2])) + bump_d = bump(data[:, 2]) + + center_head = head_center(data[:, 1]) + offset_head = th.abs(head_offset(data[:, 1])) + center_tail = tail_center(data[:, 1]) + offset_tail = th.abs(tail_offset(data[:, 1])) + + return fn(center_c, offset_c, bump_c, center_d, offset_d, bump_d, center_head, offset_head, center_tail, offset_tail, gamma, delta) def gci3_score(data, class_center, class_offset, head_center, head_offset, tail_center, tail_offset, bump, gamma): center_d = class_center(data[:, 2]) @@ -273,12 +204,10 @@ def gci3_score(data, class_center, class_offset, head_center, head_offset, tail_ score = inclusion_score(bumped_head, box_d, gamma) return score -def gci3_loss(data, class_center, class_offset, head_center, head_offset, tail_center, tail_offset, bump, gamma, reg_factor, neg=False): +def gci3_loss(data, class_center, class_offset, head_center, head_offset, tail_center, tail_offset, bump, gamma, neg=False): score = gci3_score(data, class_center, class_offset, head_center, head_offset, tail_center, tail_offset, bump, gamma) loss = score.square() - reg_loss =0# reg_factor * th.linalg.norm(bump.weight, dim=1) - - return loss + reg_loss + return loss def gci3_bot_score(data, head_offset): diff --git a/mowl/nn/el/boxsquaredel/module.py b/mowl/nn/el/boxsquaredel/module.py index 2daf9727..c34f806b 100644 --- a/mowl/nn/el/boxsquaredel/module.py +++ b/mowl/nn/el/boxsquaredel/module.py @@ -25,15 +25,17 @@ def __init__(self, nb_ont_classes, nb_rels, nb_inds=None, embed_dim=50, gamma=0, self.tail_center = self.init_embeddings(nb_rels, embed_dim) self.tail_offset = self.init_embeddings(nb_rels, embed_dim) - self.bump = self.init_embeddings(nb_ont_classes, embed_dim) + self.bump_classes = self.init_embeddings(nb_ont_classes, embed_dim) if self.nb_inds is not None and self.nb_inds > 0: self.bump_individuals = self.init_embeddings(nb_inds, embed_dim) self.ind_center = self.init_embeddings(nb_inds, embed_dim) - + self.ind_offset = self.init_embeddings(nb_inds, embed_dim) + else: self.bump_individuals = None self.ind_center = None + self.ind_offset = None self.gamma = gamma self.delta = delta @@ -59,35 +61,30 @@ def gci1_bot_loss(self, data, neg=False): def gci2_loss(self, data, neg=False): return L.gci2_loss(data, self.class_center, self.class_offset, self.head_center, - self.head_offset, self.tail_center, self.tail_offset, self.bump, - self.gamma, self.delta, self.reg_factor, neg=neg) + self.head_offset, self.tail_center, self.tail_offset, self.bump_classes, + self.gamma, self.delta, neg=neg) def gci3_loss(self, data, neg=False): return L.gci3_loss(data, self.class_center, self.class_offset, self.head_center, - self.head_offset, self.tail_center, self.tail_offset, self.bump, - self.gamma, self.reg_factor, neg=neg) + self.head_offset, self.tail_center, self.tail_offset, self.bump_classes, + self.gamma, neg=neg) def gci3_bot_loss(self, data, neg=False): return L.gci3_bot_loss(data, self.head_offset) - def gci2_score(self, data): - return L.gci2_score(data, self.class_center, self.class_offset, self.head_center, - self.head_offset, self.tail_center, self.tail_offset, self.bump, - self.gamma, self.delta) - def class_assertion_loss(self, data, neg=False): if self.ind_center is None: raise ValueError("The number of individuals must be specified to use this loss function.") - return L.class_assertion_loss(data, self.class_center, self.class_offset, self.ind_center, self.gamma, neg=neg) + return L.class_assertion_loss(data, self.ind_center, self.ind_offset, self.class_center, self.class_offset, self.gamma, neg=neg) def object_property_assertion_loss(self, data, neg=False): if self.ind_center is None: raise ValueError("The number of individuals must be specified to use this loss function.") - return L.object_property_assertion_loss(data, self.head_center, self.head_offset, self.tail_center, self.tail_offset, self.ind_center, self.bump_individuals, self.gamma, self.delta, self.reg_factor, neg=neg) + return L.object_property_assertion_loss(data, self.ind_center, self.ind_offset, self.head_center, self.head_offset, self.tail_center, self.tail_offset, self.bump_individuals, self.gamma, self.delta, neg=neg) def regularization_loss(self): - loss = L.reg_loss(self.bump, self.reg_factor) + loss = L.reg_loss(self.bump_classes, self.reg_factor) if self.bump_individuals is not None: loss += L.reg_loss(self.bump_individuals, self.reg_factor) return loss diff --git a/mowl/nn/el/elbe/losses.py b/mowl/nn/el/elbe/losses.py index 95bda200..2408215a 100644 --- a/mowl/nn/el/elbe/losses.py +++ b/mowl/nn/el/elbe/losses.py @@ -1,39 +1,41 @@ import torch as th import numpy as np +def box_inclusion_score(sub_center, sub_offset, super_center, super_offset, margin): + euc = th.abs(sub_center - super_center) + dst = th.reshape(th.linalg.norm(th.relu(euc + sub_offset - super_offset + margin), axis=1), [-1, 1]) + return dst -def class_assertion_loss(data, class_embed, class_offset, ind_embed, margin, neg=False): - c = class_embed(data[:, 0]) - off_c = th.abs(class_offset(data[:, 0])) - i = ind_embed(data[:, 1]) +def class_assertion_loss(data, ind_embed, ind_offset, class_embed, class_offset, margin, neg=False): + i = ind_embed(data[:, 0]) + off_i = th.abs(ind_offset(data[:, 0])) + c = class_embed(data[:, 1]) + off_c = th.abs(class_offset(data[:, 1])) + return box_inclusion_score(i, off_i, c, off_c, margin) + - euc = th.abs(c - i) - dst = th.reshape(th.linalg.norm(th.relu(euc - off_c + margin), axis=1), [-1, 1]) - return dst +def object_property_assertion_loss(data, ind_embed, ind_off_set, rel_embed, margin, neg=False): + if neg: + fn = gci2_score_neg + else: + fn = gci2_score -def object_property_assertion_loss(data, rel_embed, ind_embed, margin, neg=False): - subj = ind_embed(data[:, 0]) + ind_1 = ind_embed(data[:, 0]) rel = rel_embed(data[:, 1]) - obj = ind_embed(data[:, 2]) - - euc = th.abs(subj + rel - obj) - dst = th.reshape(th.linalg.norm(th.relu(euc + margin), axis=1), [-1, 1]) - return dst - + ind_2 = ind_embed(data[:, 2]) - + off_i1 = th.abs(ind_off_set(data[:, 0])) + off_i2 = th.abs(ind_off_set(data[:, 2])) + + return fn(ind_1, off_i1, ind_2, off_i2, rel, margin) + def gci0_loss(data, class_embed, class_offset, margin, neg=False): c = class_embed(data[:, 0]) d = class_embed(data[:, 1]) - off_c = th.abs(class_offset(data[:, 0])) off_d = th.abs(class_offset(data[:, 1])) - - euc = th.abs(c - d) - dst = th.reshape(th.linalg.norm(th.relu(euc + off_c - off_d + margin), axis=1), [-1, 1]) - - return dst - + return box_inclusion_score(c, off_c, d, off_d, margin) + def gci0_bot_loss(data, class_offset, neg=False): off_c = th.abs(class_offset(data[:, 0])) loss = th.linalg.norm(off_c, axis=1) @@ -71,38 +73,31 @@ def gci1_bot_loss(data, class_embed, class_offset, margin, neg=False): dst = th.reshape(th.linalg.norm(th.relu(-euc + off_c + off_d + margin), axis=1), [-1, 1]) return dst +def gci2_score(c, off_c, d, off_d, rel, margin): + euc = th.abs(c + rel - d) + dst = th.reshape(th.linalg.norm(th.relu(euc + off_c - off_d + margin), axis=1), [-1, 1]) + return dst + +def gci2_score_neg(c, off_c, d, off_d, rel, margin): + euc = th.abs(c + rel - d) + dst = th.reshape(th.linalg.norm(th.relu(euc - off_c - off_d - margin), axis=1), [-1, 1]) + return dst + def gci2_loss(data, class_embed, class_offset, rel_embed, margin, neg=False): if neg: - return gci2_loss_neg(data, class_embed, class_offset, rel_embed, margin) + fn = gci2_score_neg else: - c = class_embed(data[:, 0]) - r = rel_embed(data[:, 1]) - d = class_embed(data[:, 2]) + fn = gci2_score - off_c = th.abs(class_offset(data[:, 0])) - off_d = th.abs(class_offset(data[:, 2])) - - euc = th.abs(c + r - d) - dst = th.reshape(th.linalg.norm(th.relu(euc + off_c - off_d + margin), axis=1), [-1, 1]) - return dst - -def gci2_score(*args, **kwargs): - return gci2_loss(*args, **kwargs) - -def gci2_loss_neg(data, class_embed, class_offset, rel_embed, margin): c = class_embed(data[:, 0]) r = rel_embed(data[:, 1]) d = class_embed(data[:, 2]) off_c = th.abs(class_offset(data[:, 0])) off_d = th.abs(class_offset(data[:, 2])) - - euc = th.abs(c + r - d) - dst = th.reshape(th.linalg.norm(th.relu(euc - off_c - off_d - margin), axis=1), [-1, 1]) - return dst - - + return fn(c, off_c, d, off_d, r, margin) + def gci3_loss(data, class_embed, class_offset, rel_embed, margin, neg=False): r = rel_embed(data[:, 0]) c = class_embed(data[:, 1]) diff --git a/mowl/nn/el/elbe/module.py b/mowl/nn/el/elbe/module.py index b88d3733..2e9bd2bb 100644 --- a/mowl/nn/el/elbe/module.py +++ b/mowl/nn/el/elbe/module.py @@ -2,12 +2,9 @@ from mowl.nn import ELModule import torch as th import torch.nn as nn -from deprecated.sphinx import deprecated - -@deprecated(version="1.0.0", reason="Use ELBEModule instead") -class ELBoxModule(ELModule): - """Implementation of ELBoxEmbeddings from [peng2020]_. +class ELBEModule(ELModule): + """Implementation of ELBE from [peng2020]_. """ def __init__(self, nb_ont_classes, nb_rels, nb_inds=None, embed_dim=50, margin=0.1): super().__init__() @@ -40,9 +37,14 @@ def __init__(self, nb_ont_classes, nb_rels, nb_inds=None, embed_dim=50, margin=0 nn.init.uniform_(self.ind_embed.weight, a=-1, b=1) weight_data_normalized = th.linalg.norm(self.ind_embed.weight.data, axis=1).reshape(-1, 1) self.ind_embed.weight.data /= weight_data_normalized + + self.ind_offset = nn.Embedding(self.nb_inds, embed_dim) + nn.init.uniform_(self.ind_offset.weight, a=-1, b=1) + weight_data_normalized = th.linalg.norm(self.ind_offset.weight.data, axis=1).reshape(-1, 1) + self.ind_offset.weight.data /= weight_data_normalized else: self.ind_embed = None - + self.ind_offset = None self.margin = margin @@ -76,13 +78,9 @@ def gci3_bot_loss(self, data, neg=False): def class_assertion_loss(self, data, neg=False): if self.ind_embed is None: raise ValueError("The number of individuals must be specified to use this loss function.") - return L.class_assertion_loss(data, self.class_embed, self.class_offset, self.ind_embed, self.margin, neg=neg) + return L.class_assertion_loss(data, self.ind_embed, self.ind_offset, self.class_embed, self.class_offset, self.margin, neg=neg) def object_property_assertion_loss(self, data, neg=False): if self.ind_embed is None: raise ValueError("The number of individuals must be specified to use this loss function.") - return L.object_property_assertion_loss(data, self.rel_embed, self.ind_embed, self.margin, neg=neg) - -class ELBEModule(ELBoxModule): - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) + return L.object_property_assertion_loss(data, self.ind_embed, self.ind_offset, self.rel_embed, self.margin, neg=neg) diff --git a/mowl/nn/el/elem/losses.py b/mowl/nn/el/elem/losses.py index 80922f3c..e658d451 100644 --- a/mowl/nn/el/elem/losses.py +++ b/mowl/nn/el/elem/losses.py @@ -2,35 +2,41 @@ import numpy as np -def class_assertion_loss(data, class_embed, class_rad, ind_embed, margin, neg=False): - c = class_embed(data[:, 0]) - rc = th.abs(class_rad(data[:, 0])) - i = ind_embed(data[:, 1]) +def ball_inclusion_score(sub_center, sub_rad, super_center, super_rad, margin): + dist = th.linalg.norm(sub_center - super_center, dim=1, keepdim=True) + sub_rad - super_rad + score = th.relu(dist - margin) + return score - dist = th.linalg.norm(c - i, dim=1, keepdim=True) - rc - loss = th.relu(dist - margin) - return loss +def class_assertion_loss(data, ind_embed, ind_rad, class_embed, class_rad, margin, neg=False): + i = ind_embed(data[:, 0]) + ri = th.abs(ind_rad(data[:, 0])) + c = class_embed(data[:, 1]) + rc = th.abs(class_rad(data[:, 1])) + return ball_inclusion_score(i, ri, c, rc, margin) + +def object_property_assertion_loss(data, ind_embed, ind_rad, rel_embed, margin, neg=False): + if neg: + fn = gci2_score_neg + else: + fn = gci2_score -def object_property_assertion_loss(data, rel_embed, ind_embed, margin, neg=False): - # C subClassOf R some D - subj = ind_embed(data[:, 0]) + ind_1 = ind_embed(data[:, 0]) rel = rel_embed(data[:, 1]) - obj = ind_embed(data[:, 2]) - - dst = th.linalg.norm(subj + rel - obj, dim=1, keepdim=True) - score = th.relu(dst - margin) + 10e-6 - return score + ind_2 = ind_embed(data[:, 2]) + rad_i1 = th.abs(ind_rad(data[:, 0])) + rad_i2 = th.abs(ind_rad(data[:, 2])) + + return fn(ind_1, rad_i1, ind_2, rad_i2, rel, margin) + def gci0_loss(data, class_embed, class_rad, margin, neg=False): c = class_embed(data[:, 0]) d = class_embed(data[:, 1]) rc = th.abs(class_rad(data[:, 0])) rd = th.abs(class_rad(data[:, 1])) - dist = th.linalg.norm(c - d, dim=1, keepdim=True) + rc - rd - loss = th.relu(dist - margin) - return loss - + return ball_inclusion_score(c, rc, d, rd, margin) + def gci0_bot_loss(data, class_rad, neg=False): rc = class_rad(data[:, 0]) return rc @@ -64,44 +70,35 @@ def gci1_bot_loss(data, class_embed, class_rad, margin, neg=False): return th.relu(sr - dst + margin) -def gci2_score(data, class_embed, class_rad, rel_embed, margin): +def gci2_score(c, rad_c, d, rad_d, rel, margin): # C subClassOf R some D - c = class_embed(data[:, 0]) - rE = rel_embed(data[:, 1]) - d = class_embed(data[:, 2]) + + dst = th.linalg.norm(c + rel - d, dim=1, keepdim=True) + score = th.relu(dst + rad_c - rad_d - margin) + 10e-6 + return score + +def gci2_score_neg(c, rad_c, d, rad_d, rel, margin): + dst = th.linalg.norm(c + rel - d, dim=1, keepdim=True) + loss = th.relu(rad_c + rad_d - dst + margin) + return loss - rc = th.abs(class_rad(data[:, 0])) - rd = th.abs(class_rad(data[:, 2])) - # c should intersect with d + r - dst = th.linalg.norm(c + rE - d, dim=1, keepdim=True) - score = th.relu(dst + rc - rd - margin) + 10e-6 - return score - def gci2_loss(data, class_embed, class_rad, rel_embed, margin, neg=False): if neg: - return gci2_loss_neg(data, class_embed, class_rad, rel_embed, margin) - + fn = gci2_score_neg else: - score = gci2_score(data, class_embed, class_rad, rel_embed, margin) - return score - + fn = gci2_score -def gci2_loss_neg(data, class_embed, class_rad, rel_embed, margin): - # C subClassOf R some D c = class_embed(data[:, 0]) - rE = rel_embed(data[:, 1]) + rel = rel_embed(data[:, 1]) d = class_embed(data[:, 2]) rc = th.abs(class_rad(data[:, 0])) rd = th.abs(class_rad(data[:, 2])) - # c should intersect with d + r - - dst = th.linalg.norm(c + rE - d, dim=1, keepdim=True) - loss = th.relu(rc + rd - dst + margin) - return loss + score = fn(c, rc, d, rd, rel, margin) + return score def gci3_loss(data, class_embed, class_rad, rel_embed, margin, neg=False): # R some C subClassOf D @@ -120,8 +117,9 @@ def gci3_bot_loss(data, class_rad, neg=False): return rc -def regularization_loss(class_embed, reg_factor): - res = th.abs(th.linalg.norm(class_embed.weight, axis=1) - reg_factor).mean() - # res = th.reshape(res, [-1, 1]) - return res +def regularization_loss(class_embed, ind_embed = None, reg_norm = 1): + reg = th.abs(th.linalg.norm(class_embed.weight, axis=1) - reg_norm).mean() + if ind_embed is not None: + reg += th.abs(th.linalg.norm(ind_embed.weight, axis=1) - reg_norm).mean() + return reg diff --git a/mowl/nn/el/elem/module.py b/mowl/nn/el/elem/module.py index 3c0be090..7cffc368 100644 --- a/mowl/nn/el/elem/module.py +++ b/mowl/nn/el/elem/module.py @@ -12,14 +12,18 @@ class ELEmModule(ELModule): """ - def __init__(self, nb_ont_classes, nb_rels, nb_inds=None, embed_dim=50, margin=0.1, reg_norm=1): + def __init__(self, nb_ont_classes, nb_rels, nb_inds, embed_dim=50, margin=0.1, reg_norm=1): super().__init__() self.nb_ont_classes = nb_ont_classes self.nb_rels = nb_rels self.nb_inds = nb_inds + if self.nb_inds == 0: + self.nb_inds = None + self.reg_norm = reg_norm self.embed_dim = embed_dim + self.class_embed = nn.Embedding(self.nb_ont_classes, embed_dim) nn.init.uniform_(self.class_embed.weight, a=-1, b=1) @@ -44,10 +48,14 @@ def __init__(self, nb_ont_classes, nb_rels, nb_inds=None, embed_dim=50, margin=0 nn.init.uniform_(self.ind_embed.weight, a=-1, b=1) weight_data_normalized = th.linalg.norm(self.ind_embed.weight.data, axis=1).reshape(-1, 1) self.ind_embed.weight.data /= weight_data_normalized + + self.ind_rad = nn.Embedding(self.nb_inds, 1) + nn.init.uniform_(self.ind_rad.weight, a=-1, b=1) + else: self.ind_embed = None + self.ind_rad = None - self.margin = margin def gci0_loss(self, data, neg=False): @@ -83,13 +91,13 @@ def gci2_score(self, data): def class_assertion_loss(self, data, neg=False): if self.ind_embed is None: raise ValueError("The number of individuals must be specified to use this loss function.") - return L.class_assertion_loss(data, self.class_embed, self.class_rad, self.ind_embed, self.margin, neg=neg) + return L.class_assertion_loss(data, self.ind_embed, self.ind_rad, self.class_embed, self.class_rad, self.margin, neg=neg) def object_property_assertion_loss(self, data, neg=False): if self.ind_embed is None: raise ValueError("The number of individuals must be specified to use this loss function.") - return L.object_property_assertion_loss(data, self.rel_embed, self.ind_embed, self.margin, neg=neg) + return L.object_property_assertion_loss(data, self.ind_embed, self.ind_rad, self.rel_embed, self.margin, neg=neg) def regularization_loss(self): - return L.regularization_loss(self.class_embed, self.reg_norm) + return L.regularization_loss(self.class_embed, self.ind_embed, self.reg_norm) diff --git a/setup.py b/setup.py index 9d9db099..fa42c0ae 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ setuptools.setup( name="mowl-borg", - version="1.0.2-dev.4" + version="1.0.2-dev.4", author="Bio-Ontology Research Group", author_email="fernando.zhapacamacho@kaust.edu.sa", description="mOWL: A machine learning library with ontologies", diff --git a/tests/base_models/test_elmodel.py b/tests/base_models/test_elmodel.py index 2bf0156e..8f896eed 100644 --- a/tests/base_models/test_elmodel.py +++ b/tests/base_models/test_elmodel.py @@ -61,8 +61,7 @@ def test_class_attribute_training_dataset(self): model = EmbeddingELModel(self.family_dataset, 1, False) training_datasets = model.training_datasets - training_datasets = model.training_datasets # this is a hack to get 100% coverage - + self.assertTrue(isinstance(training_datasets, dict)) idx = random.randrange(0, len(training_datasets)) @@ -104,8 +103,7 @@ def test_class_attribute_training_dataloaders(self): model = EmbeddingELModel(self.ppi_dataset, 1, 1, False) training_dataloaders = model.training_dataloaders - training_dataloaders = model.training_dataloaders # this is a hack to get 100% coverage - + self.assertTrue(isinstance(training_dataloaders, dict)) idx = random.randrange(0, len(training_dataloaders)) random_item = list(training_dataloaders.items())[idx] @@ -191,33 +189,29 @@ def test_accessing_embeddings_attributes(self): num_relations = len(model.dataset.object_properties) num_individuals = len(model.dataset.individuals) - if num_classes > 0: - class_embeddings = model.class_embeddings - self.assertIsInstance(class_embeddings, dict) - self.assertTrue(len(class_embeddings) == num_classes) - for key, value in class_embeddings.items(): - with self.subTest(key=key): - self.assertIsInstance(key, str) - self.assertIsInstance(value, np.ndarray) - self.assertEqual(value.shape, (embed_dim,)) - - if num_relations > 0: - object_property_embeddings = model.object_property_embeddings - self.assertIsInstance(object_property_embeddings, dict) - self.assertTrue(len(object_property_embeddings) == num_relations) - for key, value in object_property_embeddings.items(): - with self.subTest(key=key): - self.assertIsInstance(key, str) - self.assertIsInstance(value, np.ndarray) - self.assertEqual(value.shape, (embed_dim,)) - - - if num_individuals > 0: - individual_embeddings = model.individual_embeddings - self.assertIsInstance(individual_embeddings, dict) - self.assertTrue(len(individual_embeddings) == num_individuals) - for key, value in individual_embeddings.items(): - with self.subTest(key=key): - self.assertIsInstance(key, str) - self.assertIsInstance(value, np.ndarray) - self.assertEqual(value.shape, (embed_dim,)) + class_embeddings = model.class_embeddings + self.assertIsInstance(class_embeddings, dict) + self.assertTrue(len(class_embeddings) == num_classes) + for key, value in class_embeddings.items(): + with self.subTest(key=key): + self.assertIsInstance(key, str) + self.assertIsInstance(value, np.ndarray) + self.assertEqual(value.shape, (embed_dim,)) + + object_property_embeddings = model.object_property_embeddings + self.assertIsInstance(object_property_embeddings, dict) + self.assertTrue(len(object_property_embeddings) == num_relations) + for key, value in object_property_embeddings.items(): + with self.subTest(key=key): + self.assertIsInstance(key, str) + self.assertIsInstance(value, np.ndarray) + self.assertEqual(value.shape, (embed_dim,)) + + individual_embeddings = model.individual_embeddings + self.assertIsInstance(individual_embeddings, dict) + self.assertTrue(len(individual_embeddings) == num_individuals) + for key, value in individual_embeddings.items(): + with self.subTest(key=key): + self.assertIsInstance(key, str) + self.assertIsInstance(value, np.ndarray) + self.assertEqual(value.shape, (embed_dim,)) diff --git a/tests/nn/test_boxsquaredel_module.py b/tests/nn/test_boxsquaredel_module.py index 2660cc2c..a9dd7b06 100644 --- a/tests/nn/test_boxsquaredel_module.py +++ b/tests/nn/test_boxsquaredel_module.py @@ -11,7 +11,8 @@ def setUpClass(self): ds = FamilyDataset() nb_classes = len(ds.classes) nb_rels = len(ds.object_properties) - self.module = BoxSquaredELModule(nb_classes, nb_rels) + nb_inds = len(ds.individuals) + self.module = BoxSquaredELModule(nb_classes, nb_rels, nb_inds) self.axioms = ELAxioms() def test_gci_0(self): diff --git a/tests/nn/test_elbox_module.py b/tests/nn/test_elbe_module.py similarity index 92% rename from tests/nn/test_elbox_module.py rename to tests/nn/test_elbe_module.py index 602c6866..b87e16ca 100644 --- a/tests/nn/test_elbox_module.py +++ b/tests/nn/test_elbe_module.py @@ -1,17 +1,18 @@ from tests.nn.fixtures import ELAxioms from unittest import TestCase -from mowl.nn import ELBoxModule +from mowl.nn import ELBEModule from tests.datasetFactory import FamilyDataset import torch as th -class TestELBoxModule(TestCase): +class TestELBEModule(TestCase): @classmethod def setUpClass(self): ds = FamilyDataset() nb_classes = len(ds.classes) nb_relations = len(ds.object_properties) - self.module = ELBoxModule(nb_classes, nb_relations) + nb_individuals = len(ds.individuals) + self.module = ELBEModule(nb_classes, nb_relations, nb_individuals) self.axioms = ELAxioms() def test_gci_0(self): diff --git a/tests/nn/test_elem_module.py b/tests/nn/test_elem_module.py index 73597d29..368a1b4c 100644 --- a/tests/nn/test_elem_module.py +++ b/tests/nn/test_elem_module.py @@ -11,7 +11,8 @@ def setUpClass(self): ds = FamilyDataset() nb_classes = len(ds.classes) nb_relations = len(ds.object_properties) - self.module = ELEmModule(nb_classes, nb_relations) + nb_individuals = len(ds.individuals) + self.module = ELEmModule(nb_classes, nb_relations, nb_individuals) self.axioms = ELAxioms() def test_gci_0(self):