Skip to content

Commit

Permalink
Merge pull request #67 from bio-ontology-research-group/dependabot/pi…
Browse files Browse the repository at this point in the history
…p/pykeen-1.10.2

⬆️ Bump pykeen from 1.10.1 to 1.10.2 and update from develop branch
  • Loading branch information
ferzcam authored Mar 20, 2024
2 parents cb88378 + 24a421e commit 9f8e8be
Show file tree
Hide file tree
Showing 21 changed files with 474 additions and 236 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Deprecated
### Removed
### Fixed
- Fix bug in GCI2 score for ELEmbeddings
- Fix bottleneck in ELBE example for PPI.
- Fix bugs in BoxSquaredEL model.

### Security

## [0.3.0]
Expand Down
45 changes: 44 additions & 1 deletion mowl/base_models/elmodel.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
from mowl.ontology.normalize import ELNormalizer
from mowl.base_models.model import Model
from mowl.datasets.el import ELDataset
from mowl.projection import projector_factory
import torch as th
from torch.utils.data import DataLoader, default_collate
from mowl.datasets.el import ELDataset

from deprecated.sphinx import versionadded

from org.semanticweb.owlapi.model import OWLClassExpression, OWLClass, OWLObjectSomeValuesFrom, OWLObjectIntersectionOf
Expand Down Expand Up @@ -48,6 +50,7 @@ def __init__(self, dataset, embed_dim, batch_size, extended=True, model_filepath
self._validation_datasets = None
self._testing_datasets = None

self._loaded_eval = False

def init_module(self):
raise NotImplementedError
Expand Down Expand Up @@ -379,3 +382,43 @@ def from_pretrained(self, model):
#self._kge_method = kge_method




def load_pairwise_eval_data(self):

if self._loaded_eval:
return

eval_property = self.dataset.get_evaluation_property()
head_classes, tail_classes = self.dataset.evaluation_classes
self._head_entities = head_classes.as_str
self._tail_entities = tail_classes.as_str

eval_projector = projector_factory('taxonomy_rels', taxonomy=False,
relations=[eval_property])

self._training_set = eval_projector.project(self.dataset.ontology)
self._testing_set = eval_projector.project(self.dataset.testing)

self._loaded_eval = True


@property
def training_set(self):
self.load_pairwise_eval_data()
return self._training_set

@property
def testing_set(self):
self.load_pairwise_eval_data()
return self._testing_set

@property
def head_entities(self):
self.load_pairwise_eval_data()
return self._head_entities

@property
def tail_entities(self):
self.load_pairwise_eval_data()
return self._tail_entities
3 changes: 2 additions & 1 deletion mowl/evaluation/rank_based.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ def load_training_scores(self):
c, d = self.head_name_indexemb[c], self.tail_name_indexemb[d]
c, d = self.head_indexemb_indexsc[c], self.tail_indexemb_indexsc[d]

self.training_scores[c, d] = 1000000
self.training_scores[c, d] = 10000

logging.info("Training scores created")
self._loaded_tr_scores = True
Expand Down Expand Up @@ -231,6 +231,7 @@ def activation(x):
print(f'Hits@100: {top100:.2f} Filtered: {ftop100:.2f}')
print(f'MR: {mean_rank:.2f} Filtered: {fmean_rank:.2f}')
print(f'AUC: {rank_auc:.2f} Filtered: {frank_auc:.2f}')
print(f"Tail entities: {num_tail_entities}")

self.metrics = {
"hits@1": top1,
Expand Down
2 changes: 2 additions & 0 deletions mowl/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
from mowl.models.elboxembeddings.examples.model_ppi import ELBoxPPI
from mowl.models.elboxembeddings.examples.model_gda import ELBoxGDA

from mowl.models.boxsquaredel.model import BoxSquaredEL

from mowl.models.graph_random_walk.random_walk_w2v_model import RandomWalkPlusW2VModel
from mowl.models.graph_kge.graph_pykeen_model import GraphPlusPyKEENModel
from mowl.models.syntactic.w2v_model import SyntacticPlusW2VModel
Empty file.
130 changes: 130 additions & 0 deletions mowl/models/boxsquaredel/evaluate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
from mowl.evaluation.base import AxiomsRankBasedEvaluator
from mowl.projection.factory import projector_factory
from mowl.projection.edge import Edge
import logging
import numpy as np
from scipy.stats import rankdata
import torch as th


class BoxSquaredELPPIEvaluator(AxiomsRankBasedEvaluator):

def __init__(
self,
axioms,
eval_method,
axioms_to_filter,
class_name_indexemb,
rel_name_indexemb,
device="cpu",
verbose=False
):

super().__init__(axioms, eval_method, axioms_to_filter, device, verbose)

self.class_name_indexemb = class_name_indexemb
self.relation_name_indexemb = rel_name_indexemb

self._loaded_training_scores = False
self._loaded_eval_data = False
self._loaded_ht_data = False

def _load_head_tail_entities(self):
if self._loaded_ht_data:
return

ents, _ = Edge.getEntitiesAndRelations(self.axioms)
ents_filter, _ = Edge.getEntitiesAndRelations(self.axioms_to_filter)

entities = list(set(ents) | set(ents_filter))

self.head_entities = set()
for e in entities:
if e in self.class_name_indexemb:
self.head_entities.add(e)
else:
logging.info("Entity %s not present in the embeddings dictionary. Ignoring it.", e)

self.tail_entities = set()
for e in entities:
if e in self.class_name_indexemb:
self.tail_entities.add(e)
else:
logging.info("Entity %s not present in the embeddings dictionary. Ignoring it.", e)

self.head_name_indexemb = {k: self.class_name_indexemb[k] for k in self.head_entities}
self.tail_name_indexemb = {k: self.class_name_indexemb[k] for k in self.tail_entities}

self.head_indexemb_indexsc = {v: k for k, v in enumerate(self.head_name_indexemb.values())}
self.tail_indexemb_indexsc = {v: k for k, v in enumerate(self.tail_name_indexemb.values())}

self._loaded_ht_data = True

def _load_training_scores(self):
if self._loaded_training_scores:
return self.training_scores

self._load_head_tail_entities()

training_scores = np.ones((len(self.head_entities), len(self.tail_entities)),
dtype=np.int32)

if self._compute_filtered_metrics:
# careful here: c must be in head entities and d must be in tail entities
for axiom in self.axioms_to_filter:
c, _, d = axiom.astuple()
if (c not in self.head_entities) or not (d in self.tail_entities):
continue

c, d = self.head_name_indexemb[c], self.tail_name_indexemb[d]
c, d = self.head_indexemb_indexsc[c], self.tail_indexemb_indexsc[d]

training_scores[c, d] = 10000

logging.info("Training scores created")

self._loaded_training_scores = True
return training_scores

def _init_axioms(self, axioms):

if axioms is None:
return None

projector = projector_factory("taxonomy_rels", relations=["http://interacts_with"])

edges = projector.project(axioms)
return edges # List of Edges

def compute_axiom_rank(self, axiom):

self.training_scores = self._load_training_scores()

c, r, d = axiom.astuple()

if not (c in self.head_entities) or not (d in self.tail_entities):
return None, None, None

# Embedding indices
c_emb_idx, d_emb_idx = self.head_name_indexemb[c], self.tail_name_indexemb[d]

# Scores matrix labels
c_sc_idx, d_sc_idx = self.head_indexemb_indexsc[c_emb_idx],
self.tail_indexemb_indexsc[d_emb_idx]

r = self.relation_name_indexemb[r]

data = th.tensor([
[c_emb_idx, r, self.tail_name_indexemb[x]] for x in
self.tail_entities]).to(self.device)

res = self.eval_method(data).squeeze().cpu().detach().numpy()

# self.testing_predictions[c_sc_idx, :] = res
index = rankdata(res, method='average')
rank = index[d_sc_idx]

findex = rankdata((res * self.training_scores[c_sc_idx, :]), method='average')
frank = findex[d_sc_idx]

return rank, frank, len(self.tail_entities)
77 changes: 77 additions & 0 deletions mowl/models/boxsquaredel/model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@

from mowl.nn import BoxSquaredELModule
from mowl.base_models.elmodel import EmbeddingELModel
from mowl.models.boxsquaredel.evaluate import BoxSquaredELPPIEvaluator
import torch as th
from torch import nn


class BoxSquaredEL(EmbeddingELModel):
"""
Implementation based on [peng2020]_.
"""

def __init__(self,
dataset,
embed_dim=50,
margin=0.02,
reg_norm=1,
learning_rate=0.001,
epochs=1000,
batch_size=4096 * 8,
delta=2.5,
reg_factor=0.2,
num_negs=4,
model_filepath=None,
device='cpu'
):
super().__init__(dataset, embed_dim, batch_size, extended=True, model_filepath=model_filepath)


self.margin = margin
self.reg_norm = reg_norm
self.delta = delta
self.reg_factor = reg_factor
self.num_negs = num_negs
self.learning_rate = learning_rate
self.epochs = epochs
self.device = device
self._loaded = False
self.extended = False
self.init_module()

def init_module(self):
self.module = BoxSquaredELModule(
len(self.class_index_dict),
len(self.object_property_index_dict),
embed_dim=self.embed_dim,
gamma=self.margin,
delta=self.delta,
reg_factor=self.reg_factor

).to(self.device)

def train(self):
raise NotImplementedError


def eval_method(self, data):
return self.module.gci2_score(data)

def get_embeddings(self):
self.init_module()

print('Load the best model', self.model_filepath)
self.load_best_model()

ent_embeds = {k: v for k, v in zip(self.class_index_dict.keys(),
self.module.class_embed.weight.cpu().detach().numpy())}
rel_embeds = {k: v for k, v in zip(self.object_property_index_dict.keys(),
self.module.rel_embed.weight.cpu().detach().numpy())}
return ent_embeds, rel_embeds

def load_best_model(self):
self.init_module()
self.module.load_state_dict(th.load(self.model_filepath))
self.module.eval()

40 changes: 21 additions & 19 deletions mowl/models/elboxembeddings/examples/model_ppi.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ class ELBoxPPI(ELBoxEmbeddings):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)

def train(self):
def train(self, validate_every=1000):
criterion = nn.MSELoss()
optimizer = th.optim.Adam(self.module.parameters(), lr=self.learning_rate)
best_loss = float('inf')
Expand All @@ -29,6 +29,9 @@ def train(self):
k: v.data for k, v in self.training_datasets.items()}
validation_dataset = self.validation_datasets["gci2"][:]

prots = [self.class_index_dict[p] for p
in self.dataset.evaluation_classes.as_str]

for epoch in trange(self.epochs):
self.module.train()

Expand All @@ -37,16 +40,13 @@ def train(self):
for gci_name, gci_dataset in training_datasets.items():
if len(gci_dataset) == 0:
continue
rand_index = np.random.choice(len(gci_dataset), size=512)
dst = self.module(gci_dataset[rand_index], gci_name)
dst = self.module(gci_dataset, gci_name)

mse_loss = criterion(dst, th.zeros(dst.shape, requires_grad=False).to(self.device))
loss += mse_loss

if gci_name == "gci2":
rand_index = np.random.choice(len(gci_dataset), size=512)
gci_batch = gci_dataset[rand_index]
prots = [self.class_index_dict[p] for p
in self.dataset.evaluation_classes.as_str]
gci_batch = gci_dataset
idxs_for_negs = np.random.choice(prots, size=len(gci_batch), replace=True)
rand_prot_ids = th.tensor(idxs_for_negs).to(self.device)
neg_data = th.cat([gci_batch[:, :2], rand_prot_ids.unsqueeze(1)], dim=1)
Expand All @@ -61,20 +61,22 @@ def train(self):
optimizer.step()
train_loss += loss.detach().item()

with th.no_grad():
self.module.eval()
valid_loss = 0
gci2_data = validation_dataset

dst = self.module(gci2_data, "gci2")
loss = criterion(dst, th.zeros(dst.shape, requires_grad=False).to(self.device))
valid_loss += loss.detach().item()
if (epoch + 1) % validate_every == 0:
with th.no_grad():
self.module.eval()
valid_loss = 0
gci2_data = validation_dataset

dst = self.module(gci2_data, "gci2")
loss = criterion(dst, th.zeros(dst.shape, requires_grad=False).to(self.device))
valid_loss += loss.detach().item()

if best_loss > valid_loss:
best_loss = valid_loss
print("Saving model..")
th.save(self.module.state_dict(), self.model_filepath)
print(f'Epoch {epoch}: Train loss: {train_loss} Valid loss: {valid_loss}')
if valid_loss < best_loss:
best_loss = valid_loss
print("Saving model..")
th.save(self.module.state_dict(), self.model_filepath)
print(f'Epoch {epoch+1}: Train loss: {train_loss} Valid loss: {valid_loss}')

return 1

Expand Down
Loading

0 comments on commit 9f8e8be

Please sign in to comment.