Skip to content

Commit d6dd4db

Browse files
authoredMar 20, 2024··
Merge pull request #68 from bio-ontology-research-group/box2el
EL Geometric Models
2 parents 0d95f06 + e8b57c8 commit d6dd4db

File tree

24 files changed

+476
-238
lines changed

24 files changed

+476
-238
lines changed
 

‎CHANGELOG.md

+4
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1111
### Deprecated
1212
### Removed
1313
### Fixed
14+
- Fix bug in GCI2 score for ELEmbeddings
15+
- Fix bottleneck in ELBE example for PPI.
16+
- Fix bugs in BoxSquaredEL model.
17+
1418
### Security
1519

1620
## [0.3.0]

‎README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ git clone https://github.com/bio-ontology-research-group/mowl.git
4444
4545
cd mowl
4646
47-
conda env create -f envs/environment_3.8.yml
47+
conda env create -f envs/environment_3_8.yml
4848
conda activate mowl
4949
5050
./build_jars.sh

‎examples/elmodels/plot_1_elembeddings.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@
5555
# ":math:`p_1` interacts with :math:`p_2`" is encoded using GCI 2 as:
5656
#
5757
# .. math::
58-
# p_1 \sqsubseteq interacts\_with. p_2
58+
# p_1 \sqsubseteq \exists interacts\_with. p_2
5959
#
6060
# For that, we can use the class :class:`mowl.models.elembeddings.examples.model_ppi.ELEmPPI` mode, which uses the :class:`mowl.datasets.builtin.PPIYeastSlimDataset` dataset.
6161

‎examples/elmodels/plot_2_elboxembeddings.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@
5050
# ":math:`p_1` interacts with :math:`p_2`" is encoded using GCI 2 as:
5151
#
5252
# .. math::
53-
# p_1 \sqsubseteq interacts\_with. p_2
53+
# p_1 \sqsubseteq \exists interacts\_with. p_2
5454
#
5555
# For that, we can use the class :class:`mowl.models.elembeddings.examples.model_ppi.ELBoxPPI` mode, which uses the :class:`mowl.datasets.builtin.PPIYeastSlimDataset` dataset.
5656

‎mowl/base_models/elmodel.py

+44-1
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
from mowl.ontology.normalize import ELNormalizer
22
from mowl.base_models.model import Model
3+
from mowl.datasets.el import ELDataset
4+
from mowl.projection import projector_factory
35
import torch as th
46
from torch.utils.data import DataLoader, default_collate
5-
from mowl.datasets.el import ELDataset
7+
68
from deprecated.sphinx import versionadded
79

810
from org.semanticweb.owlapi.model import OWLClassExpression, OWLClass, OWLObjectSomeValuesFrom, OWLObjectIntersectionOf
@@ -48,6 +50,7 @@ def __init__(self, dataset, embed_dim, batch_size, extended=True, model_filepath
4850
self._validation_datasets = None
4951
self._testing_datasets = None
5052

53+
self._loaded_eval = False
5154

5255
def init_module(self):
5356
raise NotImplementedError
@@ -379,3 +382,43 @@ def from_pretrained(self, model):
379382
#self._kge_method = kge_method
380383

381384

385+
386+
387+
def load_pairwise_eval_data(self):
388+
389+
if self._loaded_eval:
390+
return
391+
392+
eval_property = self.dataset.get_evaluation_property()
393+
head_classes, tail_classes = self.dataset.evaluation_classes
394+
self._head_entities = head_classes.as_str
395+
self._tail_entities = tail_classes.as_str
396+
397+
eval_projector = projector_factory('taxonomy_rels', taxonomy=False,
398+
relations=[eval_property])
399+
400+
self._training_set = eval_projector.project(self.dataset.ontology)
401+
self._testing_set = eval_projector.project(self.dataset.testing)
402+
403+
self._loaded_eval = True
404+
405+
406+
@property
407+
def training_set(self):
408+
self.load_pairwise_eval_data()
409+
return self._training_set
410+
411+
@property
412+
def testing_set(self):
413+
self.load_pairwise_eval_data()
414+
return self._testing_set
415+
416+
@property
417+
def head_entities(self):
418+
self.load_pairwise_eval_data()
419+
return self._head_entities
420+
421+
@property
422+
def tail_entities(self):
423+
self.load_pairwise_eval_data()
424+
return self._tail_entities

‎mowl/evaluation/rank_based.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ def load_training_scores(self):
125125
c, d = self.head_name_indexemb[c], self.tail_name_indexemb[d]
126126
c, d = self.head_indexemb_indexsc[c], self.tail_indexemb_indexsc[d]
127127

128-
self.training_scores[c, d] = 1000000
128+
self.training_scores[c, d] = 10000
129129

130130
logging.info("Training scores created")
131131
self._loaded_tr_scores = True
@@ -231,6 +231,7 @@ def activation(x):
231231
print(f'Hits@100: {top100:.2f} Filtered: {ftop100:.2f}')
232232
print(f'MR: {mean_rank:.2f} Filtered: {fmean_rank:.2f}')
233233
print(f'AUC: {rank_auc:.2f} Filtered: {frank_auc:.2f}')
234+
print(f"Tail entities: {num_tail_entities}")
234235

235236
self.metrics = {
236237
"hits@1": top1,

‎mowl/models/__init__.py

+2
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
from mowl.models.elboxembeddings.examples.model_ppi import ELBoxPPI
77
from mowl.models.elboxembeddings.examples.model_gda import ELBoxGDA
88

9+
from mowl.models.boxsquaredel.model import BoxSquaredEL
10+
911
from mowl.models.graph_random_walk.random_walk_w2v_model import RandomWalkPlusW2VModel
1012
from mowl.models.graph_kge.graph_pykeen_model import GraphPlusPyKEENModel
1113
from mowl.models.syntactic.w2v_model import SyntacticPlusW2VModel

‎mowl/models/boxsquaredel/__init__.py

Whitespace-only changes.

‎mowl/models/boxsquaredel/evaluate.py

+130
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
from mowl.evaluation.base import AxiomsRankBasedEvaluator
2+
from mowl.projection.factory import projector_factory
3+
from mowl.projection.edge import Edge
4+
import logging
5+
import numpy as np
6+
from scipy.stats import rankdata
7+
import torch as th
8+
9+
10+
class BoxSquaredELPPIEvaluator(AxiomsRankBasedEvaluator):
11+
12+
def __init__(
13+
self,
14+
axioms,
15+
eval_method,
16+
axioms_to_filter,
17+
class_name_indexemb,
18+
rel_name_indexemb,
19+
device="cpu",
20+
verbose=False
21+
):
22+
23+
super().__init__(axioms, eval_method, axioms_to_filter, device, verbose)
24+
25+
self.class_name_indexemb = class_name_indexemb
26+
self.relation_name_indexemb = rel_name_indexemb
27+
28+
self._loaded_training_scores = False
29+
self._loaded_eval_data = False
30+
self._loaded_ht_data = False
31+
32+
def _load_head_tail_entities(self):
33+
if self._loaded_ht_data:
34+
return
35+
36+
ents, _ = Edge.getEntitiesAndRelations(self.axioms)
37+
ents_filter, _ = Edge.getEntitiesAndRelations(self.axioms_to_filter)
38+
39+
entities = list(set(ents) | set(ents_filter))
40+
41+
self.head_entities = set()
42+
for e in entities:
43+
if e in self.class_name_indexemb:
44+
self.head_entities.add(e)
45+
else:
46+
logging.info("Entity %s not present in the embeddings dictionary. Ignoring it.", e)
47+
48+
self.tail_entities = set()
49+
for e in entities:
50+
if e in self.class_name_indexemb:
51+
self.tail_entities.add(e)
52+
else:
53+
logging.info("Entity %s not present in the embeddings dictionary. Ignoring it.", e)
54+
55+
self.head_name_indexemb = {k: self.class_name_indexemb[k] for k in self.head_entities}
56+
self.tail_name_indexemb = {k: self.class_name_indexemb[k] for k in self.tail_entities}
57+
58+
self.head_indexemb_indexsc = {v: k for k, v in enumerate(self.head_name_indexemb.values())}
59+
self.tail_indexemb_indexsc = {v: k for k, v in enumerate(self.tail_name_indexemb.values())}
60+
61+
self._loaded_ht_data = True
62+
63+
def _load_training_scores(self):
64+
if self._loaded_training_scores:
65+
return self.training_scores
66+
67+
self._load_head_tail_entities()
68+
69+
training_scores = np.ones((len(self.head_entities), len(self.tail_entities)),
70+
dtype=np.int32)
71+
72+
if self._compute_filtered_metrics:
73+
# careful here: c must be in head entities and d must be in tail entities
74+
for axiom in self.axioms_to_filter:
75+
c, _, d = axiom.astuple()
76+
if (c not in self.head_entities) or not (d in self.tail_entities):
77+
continue
78+
79+
c, d = self.head_name_indexemb[c], self.tail_name_indexemb[d]
80+
c, d = self.head_indexemb_indexsc[c], self.tail_indexemb_indexsc[d]
81+
82+
training_scores[c, d] = 10000
83+
84+
logging.info("Training scores created")
85+
86+
self._loaded_training_scores = True
87+
return training_scores
88+
89+
def _init_axioms(self, axioms):
90+
91+
if axioms is None:
92+
return None
93+
94+
projector = projector_factory("taxonomy_rels", relations=["http://interacts_with"])
95+
96+
edges = projector.project(axioms)
97+
return edges # List of Edges
98+
99+
def compute_axiom_rank(self, axiom):
100+
101+
self.training_scores = self._load_training_scores()
102+
103+
c, r, d = axiom.astuple()
104+
105+
if not (c in self.head_entities) or not (d in self.tail_entities):
106+
return None, None, None
107+
108+
# Embedding indices
109+
c_emb_idx, d_emb_idx = self.head_name_indexemb[c], self.tail_name_indexemb[d]
110+
111+
# Scores matrix labels
112+
c_sc_idx, d_sc_idx = self.head_indexemb_indexsc[c_emb_idx],
113+
self.tail_indexemb_indexsc[d_emb_idx]
114+
115+
r = self.relation_name_indexemb[r]
116+
117+
data = th.tensor([
118+
[c_emb_idx, r, self.tail_name_indexemb[x]] for x in
119+
self.tail_entities]).to(self.device)
120+
121+
res = self.eval_method(data).squeeze().cpu().detach().numpy()
122+
123+
# self.testing_predictions[c_sc_idx, :] = res
124+
index = rankdata(res, method='average')
125+
rank = index[d_sc_idx]
126+
127+
findex = rankdata((res * self.training_scores[c_sc_idx, :]), method='average')
128+
frank = findex[d_sc_idx]
129+
130+
return rank, frank, len(self.tail_entities)

‎mowl/models/boxsquaredel/model.py

+77
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
2+
from mowl.nn import BoxSquaredELModule
3+
from mowl.base_models.elmodel import EmbeddingELModel
4+
from mowl.models.boxsquaredel.evaluate import BoxSquaredELPPIEvaluator
5+
import torch as th
6+
from torch import nn
7+
8+
9+
class BoxSquaredEL(EmbeddingELModel):
10+
"""
11+
Implementation based on [peng2020]_.
12+
"""
13+
14+
def __init__(self,
15+
dataset,
16+
embed_dim=50,
17+
margin=0.02,
18+
reg_norm=1,
19+
learning_rate=0.001,
20+
epochs=1000,
21+
batch_size=4096 * 8,
22+
delta=2.5,
23+
reg_factor=0.2,
24+
num_negs=4,
25+
model_filepath=None,
26+
device='cpu'
27+
):
28+
super().__init__(dataset, embed_dim, batch_size, extended=True, model_filepath=model_filepath)
29+
30+
31+
self.margin = margin
32+
self.reg_norm = reg_norm
33+
self.delta = delta
34+
self.reg_factor = reg_factor
35+
self.num_negs = num_negs
36+
self.learning_rate = learning_rate
37+
self.epochs = epochs
38+
self.device = device
39+
self._loaded = False
40+
self.extended = False
41+
self.init_module()
42+
43+
def init_module(self):
44+
self.module = BoxSquaredELModule(
45+
len(self.class_index_dict),
46+
len(self.object_property_index_dict),
47+
embed_dim=self.embed_dim,
48+
gamma=self.margin,
49+
delta=self.delta,
50+
reg_factor=self.reg_factor
51+
52+
).to(self.device)
53+
54+
def train(self):
55+
raise NotImplementedError
56+
57+
58+
def eval_method(self, data):
59+
return self.module.gci2_score(data)
60+
61+
def get_embeddings(self):
62+
self.init_module()
63+
64+
print('Load the best model', self.model_filepath)
65+
self.load_best_model()
66+
67+
ent_embeds = {k: v for k, v in zip(self.class_index_dict.keys(),
68+
self.module.class_embed.weight.cpu().detach().numpy())}
69+
rel_embeds = {k: v for k, v in zip(self.object_property_index_dict.keys(),
70+
self.module.rel_embed.weight.cpu().detach().numpy())}
71+
return ent_embeds, rel_embeds
72+
73+
def load_best_model(self):
74+
self.init_module()
75+
self.module.load_state_dict(th.load(self.model_filepath))
76+
self.module.eval()
77+

0 commit comments

Comments
 (0)
Please sign in to comment.