Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

⬆️ Bump pykeen from 1.10.1 to 1.10.2 #67

Merged
merged 11 commits into from
Mar 20, 2024
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Deprecated
### Removed
### Fixed
- Fix bug in GCI2 score for ELEmbeddings
- Fix bottleneck in ELBE example for PPI.
- Fix bugs in BoxSquaredEL model.

### Security

## [0.3.0]
Expand Down
45 changes: 44 additions & 1 deletion mowl/base_models/elmodel.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
from mowl.ontology.normalize import ELNormalizer
from mowl.base_models.model import Model
from mowl.datasets.el import ELDataset
from mowl.projection import projector_factory
import torch as th
from torch.utils.data import DataLoader, default_collate
from mowl.datasets.el import ELDataset

from deprecated.sphinx import versionadded

from org.semanticweb.owlapi.model import OWLClassExpression, OWLClass, OWLObjectSomeValuesFrom, OWLObjectIntersectionOf
Expand Down Expand Up @@ -48,6 +50,7 @@ def __init__(self, dataset, embed_dim, batch_size, extended=True, model_filepath
self._validation_datasets = None
self._testing_datasets = None

self._loaded_eval = False

def init_module(self):
raise NotImplementedError
Expand Down Expand Up @@ -379,3 +382,43 @@ def from_pretrained(self, model):
#self._kge_method = kge_method




def load_pairwise_eval_data(self):

if self._loaded_eval:
return

eval_property = self.dataset.get_evaluation_property()
head_classes, tail_classes = self.dataset.evaluation_classes
self._head_entities = head_classes.as_str
self._tail_entities = tail_classes.as_str

eval_projector = projector_factory('taxonomy_rels', taxonomy=False,
relations=[eval_property])

self._training_set = eval_projector.project(self.dataset.ontology)
self._testing_set = eval_projector.project(self.dataset.testing)

self._loaded_eval = True


@property
def training_set(self):
self.load_pairwise_eval_data()
return self._training_set

@property
def testing_set(self):
self.load_pairwise_eval_data()
return self._testing_set

@property
def head_entities(self):
self.load_pairwise_eval_data()
return self._head_entities

@property
def tail_entities(self):
self.load_pairwise_eval_data()
return self._tail_entities
3 changes: 2 additions & 1 deletion mowl/evaluation/rank_based.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ def load_training_scores(self):
c, d = self.head_name_indexemb[c], self.tail_name_indexemb[d]
c, d = self.head_indexemb_indexsc[c], self.tail_indexemb_indexsc[d]

self.training_scores[c, d] = 1000000
self.training_scores[c, d] = 10000

logging.info("Training scores created")
self._loaded_tr_scores = True
Expand Down Expand Up @@ -231,6 +231,7 @@ def activation(x):
print(f'Hits@100: {top100:.2f} Filtered: {ftop100:.2f}')
print(f'MR: {mean_rank:.2f} Filtered: {fmean_rank:.2f}')
print(f'AUC: {rank_auc:.2f} Filtered: {frank_auc:.2f}')
print(f"Tail entities: {num_tail_entities}")

self.metrics = {
"hits@1": top1,
Expand Down
2 changes: 2 additions & 0 deletions mowl/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
from mowl.models.elboxembeddings.examples.model_ppi import ELBoxPPI
from mowl.models.elboxembeddings.examples.model_gda import ELBoxGDA

from mowl.models.boxsquaredel.model import BoxSquaredEL

from mowl.models.graph_random_walk.random_walk_w2v_model import RandomWalkPlusW2VModel
from mowl.models.graph_kge.graph_pykeen_model import GraphPlusPyKEENModel
from mowl.models.syntactic.w2v_model import SyntacticPlusW2VModel
Empty file.
130 changes: 130 additions & 0 deletions mowl/models/boxsquaredel/evaluate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
from mowl.evaluation.base import AxiomsRankBasedEvaluator
from mowl.projection.factory import projector_factory
from mowl.projection.edge import Edge
import logging
import numpy as np
from scipy.stats import rankdata
import torch as th


class BoxSquaredELPPIEvaluator(AxiomsRankBasedEvaluator):

def __init__(
self,
axioms,
eval_method,
axioms_to_filter,
class_name_indexemb,
rel_name_indexemb,
device="cpu",
verbose=False
):

super().__init__(axioms, eval_method, axioms_to_filter, device, verbose)

self.class_name_indexemb = class_name_indexemb
self.relation_name_indexemb = rel_name_indexemb

self._loaded_training_scores = False
self._loaded_eval_data = False
self._loaded_ht_data = False

def _load_head_tail_entities(self):
if self._loaded_ht_data:
return

ents, _ = Edge.getEntitiesAndRelations(self.axioms)
ents_filter, _ = Edge.getEntitiesAndRelations(self.axioms_to_filter)

entities = list(set(ents) | set(ents_filter))

self.head_entities = set()
for e in entities:
if e in self.class_name_indexemb:
self.head_entities.add(e)
else:
logging.info("Entity %s not present in the embeddings dictionary. Ignoring it.", e)

self.tail_entities = set()
for e in entities:
if e in self.class_name_indexemb:
self.tail_entities.add(e)
else:
logging.info("Entity %s not present in the embeddings dictionary. Ignoring it.", e)

self.head_name_indexemb = {k: self.class_name_indexemb[k] for k in self.head_entities}
self.tail_name_indexemb = {k: self.class_name_indexemb[k] for k in self.tail_entities}

self.head_indexemb_indexsc = {v: k for k, v in enumerate(self.head_name_indexemb.values())}
self.tail_indexemb_indexsc = {v: k for k, v in enumerate(self.tail_name_indexemb.values())}

self._loaded_ht_data = True

def _load_training_scores(self):
if self._loaded_training_scores:
return self.training_scores

self._load_head_tail_entities()

training_scores = np.ones((len(self.head_entities), len(self.tail_entities)),
dtype=np.int32)

if self._compute_filtered_metrics:
# careful here: c must be in head entities and d must be in tail entities
for axiom in self.axioms_to_filter:
c, _, d = axiom.astuple()
if (c not in self.head_entities) or not (d in self.tail_entities):
continue

c, d = self.head_name_indexemb[c], self.tail_name_indexemb[d]
c, d = self.head_indexemb_indexsc[c], self.tail_indexemb_indexsc[d]

training_scores[c, d] = 10000

logging.info("Training scores created")

self._loaded_training_scores = True
return training_scores

def _init_axioms(self, axioms):

if axioms is None:
return None

projector = projector_factory("taxonomy_rels", relations=["http://interacts_with"])

edges = projector.project(axioms)
return edges # List of Edges

def compute_axiom_rank(self, axiom):

self.training_scores = self._load_training_scores()

c, r, d = axiom.astuple()

if not (c in self.head_entities) or not (d in self.tail_entities):
return None, None, None

# Embedding indices
c_emb_idx, d_emb_idx = self.head_name_indexemb[c], self.tail_name_indexemb[d]

# Scores matrix labels
c_sc_idx, d_sc_idx = self.head_indexemb_indexsc[c_emb_idx],
self.tail_indexemb_indexsc[d_emb_idx]

r = self.relation_name_indexemb[r]

data = th.tensor([
[c_emb_idx, r, self.tail_name_indexemb[x]] for x in
self.tail_entities]).to(self.device)

res = self.eval_method(data).squeeze().cpu().detach().numpy()

# self.testing_predictions[c_sc_idx, :] = res
index = rankdata(res, method='average')
rank = index[d_sc_idx]

findex = rankdata((res * self.training_scores[c_sc_idx, :]), method='average')
frank = findex[d_sc_idx]

return rank, frank, len(self.tail_entities)
77 changes: 77 additions & 0 deletions mowl/models/boxsquaredel/model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@

from mowl.nn import BoxSquaredELModule
from mowl.base_models.elmodel import EmbeddingELModel
from mowl.models.boxsquaredel.evaluate import BoxSquaredELPPIEvaluator
import torch as th
from torch import nn


class BoxSquaredEL(EmbeddingELModel):
"""
Implementation based on [peng2020]_.
"""

def __init__(self,
dataset,
embed_dim=50,
margin=0.02,
reg_norm=1,
learning_rate=0.001,
epochs=1000,
batch_size=4096 * 8,
delta=2.5,
reg_factor=0.2,
num_negs=4,
model_filepath=None,
device='cpu'
):
super().__init__(dataset, embed_dim, batch_size, extended=True, model_filepath=model_filepath)


self.margin = margin
self.reg_norm = reg_norm
self.delta = delta
self.reg_factor = reg_factor
self.num_negs = num_negs
self.learning_rate = learning_rate
self.epochs = epochs
self.device = device
self._loaded = False
self.extended = False
self.init_module()

def init_module(self):
self.module = BoxSquaredELModule(
len(self.class_index_dict),
len(self.object_property_index_dict),
embed_dim=self.embed_dim,
gamma=self.margin,
delta=self.delta,
reg_factor=self.reg_factor

).to(self.device)

def train(self):
raise NotImplementedError


def eval_method(self, data):
return self.module.gci2_score(data)

def get_embeddings(self):
self.init_module()

print('Load the best model', self.model_filepath)
self.load_best_model()

ent_embeds = {k: v for k, v in zip(self.class_index_dict.keys(),
self.module.class_embed.weight.cpu().detach().numpy())}
rel_embeds = {k: v for k, v in zip(self.object_property_index_dict.keys(),
self.module.rel_embed.weight.cpu().detach().numpy())}
return ent_embeds, rel_embeds

def load_best_model(self):
self.init_module()
self.module.load_state_dict(th.load(self.model_filepath))
self.module.eval()

40 changes: 21 additions & 19 deletions mowl/models/elboxembeddings/examples/model_ppi.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ class ELBoxPPI(ELBoxEmbeddings):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)

def train(self):
def train(self, validate_every=1000):
criterion = nn.MSELoss()
optimizer = th.optim.Adam(self.module.parameters(), lr=self.learning_rate)
best_loss = float('inf')
Expand All @@ -29,6 +29,9 @@ def train(self):
k: v.data for k, v in self.training_datasets.items()}
validation_dataset = self.validation_datasets["gci2"][:]

prots = [self.class_index_dict[p] for p
in self.dataset.evaluation_classes.as_str]

for epoch in trange(self.epochs):
self.module.train()

Expand All @@ -37,16 +40,13 @@ def train(self):
for gci_name, gci_dataset in training_datasets.items():
if len(gci_dataset) == 0:
continue
rand_index = np.random.choice(len(gci_dataset), size=512)
dst = self.module(gci_dataset[rand_index], gci_name)
dst = self.module(gci_dataset, gci_name)

mse_loss = criterion(dst, th.zeros(dst.shape, requires_grad=False).to(self.device))
loss += mse_loss

if gci_name == "gci2":
rand_index = np.random.choice(len(gci_dataset), size=512)
gci_batch = gci_dataset[rand_index]
prots = [self.class_index_dict[p] for p
in self.dataset.evaluation_classes.as_str]
gci_batch = gci_dataset
idxs_for_negs = np.random.choice(prots, size=len(gci_batch), replace=True)
rand_prot_ids = th.tensor(idxs_for_negs).to(self.device)
neg_data = th.cat([gci_batch[:, :2], rand_prot_ids.unsqueeze(1)], dim=1)
Expand All @@ -61,20 +61,22 @@ def train(self):
optimizer.step()
train_loss += loss.detach().item()

with th.no_grad():
self.module.eval()
valid_loss = 0
gci2_data = validation_dataset

dst = self.module(gci2_data, "gci2")
loss = criterion(dst, th.zeros(dst.shape, requires_grad=False).to(self.device))
valid_loss += loss.detach().item()
if (epoch + 1) % validate_every == 0:
with th.no_grad():
self.module.eval()
valid_loss = 0
gci2_data = validation_dataset

dst = self.module(gci2_data, "gci2")
loss = criterion(dst, th.zeros(dst.shape, requires_grad=False).to(self.device))
valid_loss += loss.detach().item()

if best_loss > valid_loss:
best_loss = valid_loss
print("Saving model..")
th.save(self.module.state_dict(), self.model_filepath)
print(f'Epoch {epoch}: Train loss: {train_loss} Valid loss: {valid_loss}')
if valid_loss < best_loss:
best_loss = valid_loss
print("Saving model..")
th.save(self.module.state_dict(), self.model_filepath)
print(f'Epoch {epoch+1}: Train loss: {train_loss} Valid loss: {valid_loss}')

return 1

Expand Down
Loading
Loading