Skip to content

Inductive #47

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 23 commits into from
Apr 16, 2023
Merged
Changes from 1 commit
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
7a87b17
:memo:
ferzcam Feb 23, 2023
18ce5fd
:sparkles: :construction: #43 added base graph model to encapsulate g…
ferzcam Feb 23, 2023
f4a47a2
test the inductive method - dl2vec (fixes #43)
azzatha Feb 24, 2023
3532dcd
:fire: delete unused directories
ferzcam Feb 24, 2023
b43ffc1
Delete mowl/lib directory
ferzcam Feb 24, 2023
a2da655
:construction: :art: #43 Added model.add_axioms method. Tested on Emb…
ferzcam Feb 24, 2023
ef5d06e
:memo: :construction: updating docs
ferzcam Feb 24, 2023
d6ba423
:construction: :art: #43 working on graph-based model
ferzcam Feb 26, 2023
5f80607
:construction: :art: #43 add_axioms method is working for graph-based…
ferzcam Feb 26, 2023
54b7a00
:art: #43 added add_axioms method for Graph+PyKEEN models
ferzcam Feb 28, 2023
cca0e1d
#43 added missing files
ferzcam Mar 1, 2023
f4af22d
:test_tube: added failing tests for loading pretrained model
ferzcam Mar 7, 2023
f449b35
test the inductive method - dl2vec (fixes #43)
azzatha Mar 7, 2023
e074bb1
test the inductive method - dl2vec (fixes #43)
azzatha Mar 7, 2023
dbd9f4d
test the inductive method - dl2vec (fixes #43)
azzatha Mar 7, 2023
bd4fdc0
test the inductive method - dl2vec (fixes #43)
azzatha Mar 7, 2023
610da3b
test the inductive method - dl2vec (fixes #43)
azzatha Mar 7, 2023
67aa9ff
test the inductive method (fixes #43)
azzatha Mar 7, 2023
712219a
.
ferzcam Mar 20, 2023
d6ee3a3
:fire:
ferzcam Mar 20, 2023
0871e2e
:sparkles: #43 Added `SyntacticModel` in the base_models module. :whi…
ferzcam Mar 20, 2023
ea2b20f
:sparkles: #43 added from_pretrained method :white_check_mark:
ferzcam Mar 21, 2023
44a208c
:wrench: replaced nosetests to pytest to enable python 3.10 compatibi…
ferzcam Apr 16, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
#43 added missing files
  • Loading branch information
ferzcam committed Mar 1, 2023
commit cca0e1de4c7f8343a8ea99f27da5353b35d2550f
137 changes: 137 additions & 0 deletions mowl/models/graph_kge/graph_pykeen_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
from pykeen.models import ERModel

from mowl.base_models import KGEModel
from mowl.projection import Edge
import torch as th
import copy
import numpy as np
from pykeen.nn.init import PretrainedInitializer

class GraphPlusPyKEENModel(KGEModel):

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)

self._triples_factory = None
self._kge_method = None


@property
def triples_factory(self):
if self._triples_factory is not None:
return self._triples_factory

self._triples_factory = Edge.as_pykeen(self.edges, entity_to_id = self.graph_node_to_id, relation_to_id = self.graph_relation_to_id, create_inverse_triples=False)
self._graph_node_to_id = self._triples_factory.entity_to_id
self._graph_relation_to_id = self._triples_factory.relation_to_id
return self._triples_factory


@property
def class_embeddings(self):
classes = self.dataset.classes.as_str
if len(classes) == 0:
return dict()


cls_graph_id = {cls: self.triples_factory.entity_to_id[cls] for cls in classes if cls in self.triples_factory.entity_to_id}

def get_embedding(idxs):
return self.kge_method.entity_representations[0](indices=idxs).cpu().detach().numpy()

idxs = th.tensor(list(cls_graph_id.values()))
cls_embeddings = dict(zip(cls_graph_id.keys(), get_embedding(idxs)))
return cls_embeddings

@property
def object_property_embeddings(self):
object_properties = self.graph_relation_to_id.keys()
if len(object_properties) == 0:
return dict()

op_graph_id = {op: self.triples_factory.relation_to_id[op] for op in object_properties if op in self.triples_factory.relation_to_id}

def get_embedding(idxs):
return self.kge_method.relation_representations[0](indices=idxs).cpu().detach().numpy()

idxs = th.tensor(list(op_graph_id.values()))
op_embeddings = dict(zip(op_graph_id.keys(), get_embedding(idxs)))
return op_embeddings

@property
def individual_embeddings(self):
individuals = self.dataset.individuals.as_str
if len(individuals) == 0:
return dict()

ind_graph_id = {ind: self.triples_factory.entity_to_id[ind] for ind in individuals if ind in self.triples_factory.entity_to_id}

def get_embedding(idxs):
return self.kge_method.entity_representations[0](indices=idxs).cpu().detach().numpy()

idxs = th.tensor(list(ind_graph_id.values()))
ind_embeddings = dict(zip(ind_graph_id.keys(), get_embedding(idxs)))
return ind_embeddings

def set_kge_method(self, kge_method, *args, **kwargs):
try:
self._kge_method_uninitialized = kge_method
initialized_kge_method = kge_method(triples_factory=self.triples_factory, *args, **kwargs)

except TypeError:
raise TypeError(f"Parameter 'kge_method' must be a pykeen.models.ERModel object. Got {type(kge_method)} instead.")


if not isinstance(initialized_kge_method, ERModel):
raise TypeError(f"Parameter 'kge_method' must be a pykeen.models.ERModel object. Got {type(kge_method)} instead.")


self._kge_method = initialized_kge_method
self._kge_method_args = args
self._kge_method_kwargs = kwargs

def add_axioms(self, *axioms):
prev_class_embeds = copy.deepcopy(self.class_embeddings)
prev_object_property_embeds = copy.deepcopy(self.object_property_embeddings)
prev_individual_embeds = copy.deepcopy(self.individual_embeddings)
prev_relation_to_id = self.triples_factory.relation_to_id
print(f"Number of classes before adding axioms: {len(prev_class_embeds)}")
print(f"Number of object properties before adding axioms: {len(prev_object_property_embeds)}")
print(f"Number of individuals before adding axioms: {len(prev_individual_embeds)}")

self.dataset.add_axioms(*axioms)
self._load_edges()
self._triples_factory = Edge.as_pykeen(self.edges, entity_to_id = self.graph_node_to_id,
relation_to_id = self.graph_relation_to_id, create_inverse_triples=False)


new_class_embeds = []
for new_node, new_id in self.graph_node_to_id.items():
if new_node in prev_class_embeds:
new_class_embeds.append(prev_class_embeds[new_node])
elif new_node in prev_individual_embeds:
new_class_embeds.append(prev_individual_embeds[new_node])
else:
class_size = self.kge_method.entity_representations[0](indices=None).shape[1]
new_class_embeds.append(np.random.normal(size=class_size))

new_class_embeds = np.asarray(new_class_embeds)

new_object_property_embeds = []
for new_relation, new_id in self.graph_relation_to_id.items():
if new_relation in prev_object_property_embeds:
new_object_property_embeds.append(prev_object_property_embeds[new_relation])
else:
op_size = self.kge_method.relation_representations[0](indices=None).shape[1]
new_object_property_embeds.append(np.random.normal(size=op_size))

new_object_property_embeds = np.asarray(new_object_property_embeds)

pretrained_cls_embeddings = th.tensor(new_class_embeds)
pretrained_op_embeddings = th.tensor(new_object_property_embeds)

new_kge_method = self._kge_method_uninitialized(triples_factory=self.triples_factory,
entity_initializer=PretrainedInitializer(tensor=pretrained_cls_embeddings),
relation_initializer=PretrainedInitializer(tensor=pretrained_op_embeddings),
*self._kge_method_args, **self._kge_method_kwargs)
self._kge_method = new_kge_method
94 changes: 94 additions & 0 deletions mowl/models/graph_random_walk/random_walk_w2v_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
from mowl.base_models.graph_model import RandomWalkModel
from gensim.models import Word2Vec
from gensim.models.word2vec import LineSentence
import mowl.error.messages as msg

class RandomWalkPlusW2VModel(RandomWalkModel):

def __init__(self, *args, **kwargs):
super(RandomWalkPlusW2VModel, self).__init__(*args, **kwargs)

self._edges = None
self.w2v_model = None
self.update_w2v_model = False

@property
def class_embeddings(self):
if self.w2v_model is None:
raise AttributeError(msg.W2V_MODEL_NOT_SET)
if len(self.w2v_model.wv) == 0:
raise AttributeError(msg.RANDOM_WALK_MODEL_EMBEDDINGS_NOT_FOUND)

cls_embeds = {}
for cls in self.dataset.classes.as_str:
if cls in self.w2v_model.wv:
cls_embeds[cls] = self.w2v_model.wv[cls]
return cls_embeds

@property
def object_property_embeddings(self):
if self.w2v_model is None:
raise AttributeError(msg.W2V_MODEL_NOT_SET)
if len(self.w2v_model.wv) == 0:
raise AttributeError(msg.RANDOM_WALK_MODEL_EMBEDDINGS_NOT_FOUND)

obj_prop_embeds = {}
for obj_prop in self.dataset.object_properties.as_str:
if obj_prop in self.w2v_model.wv:
obj_prop_embeds[obj_prop] = self.w2v_model.wv[obj_prop]
return obj_prop_embeds

@property
def individual_embeddings(self):
if self.w2v_model is None:
raise AttributeError(msg.W2V_MODEL_NOT_SET)
if len(self.w2v_model.wv) == 0:
raise AttributeError(msg.RANDOM_WALK_MODEL_EMBEDDINGS_NOT_FOUND)


ind_embeds = {}
for ind in self.dataset.individuals.as_str:
if ind in self.w2v_model.wv:
obj_prop_embeds[ind] = self.w2v_model.wv[ind]
return ind_embeds


def set_w2v_model(self, *args, **kwargs):
self.w2v_model = Word2Vec(*args, **kwargs)

def train(self, epochs=None):
if self.projector is None:
raise AttributeError(msg.GRAPH_MODEL_PROJECTOR_NOT_SET)
if self.walker is None:
raise AttributeError(msg.RANDOM_WALK_MODEL_WALKER_NOT_SET)
if self.w2v_model is None:
raise AttributeError(msg.W2V_MODEL_NOT_SET)
if epochs is None:
epochs = self.w2v_model.epochs

if self._edges is None:
self._edges = self.projector.project(self.dataset.ontology)
self.walker.walk(self._edges)
sentences = LineSentence(self.walker.outfile)
self.w2v_model.build_vocab(sentences, update=self.update_w2v_model)
if epochs > 0:
self.w2v_model.train(sentences, total_examples=self.w2v_model.corpus_count, epochs=epochs)

def add_axioms(self, *axioms):
classes = set()
object_properties = set()
individuals = set()

for axiom in axioms:
classes |= set(axiom.getClassesInSignature())
object_properties |= set(axiom.getObjectPropertiesInSignature())
individuals |= set(axiom.getIndividualsInSignature())

new_entities = list(classes.union(object_properties).union(individuals))

self.dataset.add_axioms(*axioms)
self._edges = self.projector.project(self.dataset.ontology)
self.walker.walk(self._edges, nodes_of_interest=new_entities)
self.update_w2v_model = True


14 changes: 14 additions & 0 deletions mowl/utils/random.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
import random
import torch as th
import numpy as np
import os

def seed_everything(seed=42):
random.seed(seed)
os.environ['PYTHONHASHSEED'] = str(seed)
np.random.seed(seed)
th.manual_seed(seed)
th.cuda.manual_seed(seed)
th.cuda.manual_seed_all(seed)
th.backends.cudnn.deterministic = True
th.backends.cudnn.benchmark = False
37 changes: 37 additions & 0 deletions tests/models/test_graph_pykeen_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
from unittest import TestCase
from tests.datasetFactory import FamilyDataset
from mowl.models import GraphPlusPyKEENModel
from mowl.projection import TaxonomyProjector
from pykeen.triples import TriplesFactory
from pykeen.models import TransE, ERModel


class TestPyKEENModel(TestCase):

@classmethod
def setUpClass(self):
self.dataset = FamilyDataset()

def test_get_triples_factory(self):
model = GraphPlusPyKEENModel(self.dataset)
model.set_projector(TaxonomyProjector())

triples_factory = model.triples_factory
self.assertIsInstance(triples_factory, TriplesFactory)

def test_set_kge_method(self):
"""This should check the behaviour of the set_kge method"""

model = GraphPlusPyKEENModel(self.dataset)
model.set_projector(TaxonomyProjector())

with self.assertRaisesRegex(TypeError,
"Parameter 'kge_method' must be a pykeen.models.ERModel object"):
model.set_kge_method(1)


transe = TransE
model.set_kge_method(transe)
self.assertIsInstance(model.kge_method, ERModel)


60 changes: 60 additions & 0 deletions tests/models/test_graph_rw_w2v_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
from unittest import TestCase
from tests.datasetFactory import FamilyDataset
from mowl.models import RandomWalkPlusW2VModel
from mowl.projection import TaxonomyProjector
from mowl.walking import DeepWalk
import mowl.error.messages as msg


class TestRandomWalkPlusW2VModel(TestCase):

@classmethod
def setUpClass(self):
self.dataset = FamilyDataset()


def test_rw_w2v_no_w2v_access_embeddings_error(self):
"""This should test that correct errors are raised when the RandomWalkPlusW2VModel does not have a w2v model"""
model = RandomWalkPlusW2VModel(self.dataset)

with self.assertRaisesRegex(AttributeError, msg.W2V_MODEL_NOT_SET):
model.class_embeddings

with self.assertRaisesRegex(AttributeError, msg.W2V_MODEL_NOT_SET):
model.object_property_embeddings

with self.assertRaisesRegex(AttributeError, msg.W2V_MODEL_NOT_SET):
model.individual_embeddings

def test_rw_w2v_not_trained_access_embeddings_error(self):
"""This should test that correct errors are raised when the RandomWalkPlusW2VModel is not trained"""
model = RandomWalkPlusW2VModel(self.dataset)
model.set_w2v_model()

with self.assertRaisesRegex(AttributeError, msg.RANDOM_WALK_MODEL_EMBEDDINGS_NOT_FOUND):
model.class_embeddings

with self.assertRaisesRegex(AttributeError, msg.RANDOM_WALK_MODEL_EMBEDDINGS_NOT_FOUND):
model.object_property_embeddings

with self.assertRaisesRegex(AttributeError, msg.RANDOM_WALK_MODEL_EMBEDDINGS_NOT_FOUND):
model.individual_embeddings


def test_train_method_error(self):
"""This should test that model cannot be trained without projector, walker set and w2v model set"""

model = RandomWalkPlusW2VModel(self.dataset)
with self.assertRaisesRegex(AttributeError, msg.GRAPH_MODEL_PROJECTOR_NOT_SET):
model.train()

model.set_projector(TaxonomyProjector())

with self.assertRaisesRegex(AttributeError, msg.RANDOM_WALK_MODEL_WALKER_NOT_SET):
model.train()

model.set_walker(DeepWalk(1,1))

with self.assertRaisesRegex(AttributeError, msg.W2V_MODEL_NOT_SET):
model.train()