🐛 📝 fixed bugs captured by documentation examples

bio-ontology-research-group · ferzcam · Jul 30, 2024 · Jun 11, 2024 · Jun 12, 2024 · Jul 16, 2024
commit 850a9877c571179957f54594dee807f61e021514
diff --git a/docs/source/examples/elmodels/plot_1_elembeddings.ipynb b/docs/source/examples/elmodels/plot_1_elembeddings.ipynb
@@ -1,16 +1,5 @@
 {
   "cells": [
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "collapsed": false
-      },
-      "outputs": [],
-      "source": [
-        "%matplotlib inline"
-      ]
-    },
     {
       "cell_type": "markdown",
       "metadata": {},
@@ -47,7 +36,7 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "## EL-Embeddings (PyTorch) module.\n\nEL-Embeddings defines a geometric modelling for all the GCIs in the EL language.\nThe implementation of ELEmbeddings module can be found at :class:`mowl.nn.el.elem.module.ELEmModule`.\n\n## EL-Embeddings model\n\nThe module :class:`mowl.nn.el.elem.module.ELEmModule` is used in the :class:`mowl.models.elembeddings.model.ELEmbeddings`.\nIn the use case of this example, we will test over a biological problem, which is\nprotein-protein interactions. Given two proteins $p_1,p_2$, the phenomenon\n\"$p_1$ interacts with $p_2$\" is encoded using GCI 2 as:\n\n\\begin{align}p_1 \\sqsubseteq interacts\\_with. p_2\\end{align}\n\nFor that, we can use the class :class:`mowl.models.elembeddings.examples.model_ppi.ELEmPPI` mode, which uses the :class:`mowl.datasets.builtin.PPIYeastSlimDataset` dataset.\n\n"
+        "## EL-Embeddings (PyTorch) module.\n\nEL-Embeddings defines a geometric modelling for all the GCIs in the EL language.\nThe implementation of ELEmbeddings module can be found at :class:`mowl.nn.el.elem.module.ELEmModule`.\n\n## EL-Embeddings model\n\nThe module :class:`mowl.nn.el.elem.module.ELEmModule` is used in the :class:`mowl.models.elembeddings.model.ELEmbeddings`.\nIn the use case of this example, we will test over a biological problem, which is\nprotein-protein interactions. Given two proteins $p_1,p_2$, the phenomenon\n\"$p_1$ interacts with $p_2$\" is encoded using GCI 2 as:\n\n\\begin{align}p_1 \\sqsubseteq \\exists interacts\\_with. p_2\\end{align}\n\nFor that, we can use the class :class:`mowl.models.elembeddings.examples.model_ppi.ELEmPPI` mode, which uses the :class:`mowl.datasets.builtin.PPIYeastSlimDataset` dataset.\n\n"
       ]
     },
     {
@@ -103,7 +92,7 @@
       "name": "python",
       "nbconvert_exporter": "python",
       "pygments_lexer": "ipython3",
-      "version": "3.8.16"
+      "version": "3.8.19"
     }
   },
   "nbformat": 4,

diff --git a/docs/source/examples/elmodels/plot_1_elembeddings.py b/docs/source/examples/elmodels/plot_1_elembeddings.py
@@ -55,7 +55,7 @@
 # ":math:`p_1` interacts with :math:`p_2`" is encoded using GCI 2 as:
 #
 # .. math::
-#    p_1 \sqsubseteq interacts\_with. p_2
+#    p_1 \sqsubseteq \exists interacts\_with. p_2
 #
 # For that, we can use the class :class:`mowl.models.elembeddings.examples.model_ppi.ELEmPPI` mode, which uses the :class:`mowl.datasets.builtin.PPIYeastSlimDataset` dataset.
 

diff --git a/docs/source/examples/elmodels/plot_1_elembeddings.rst b/docs/source/examples/elmodels/plot_1_elembeddings.rst
diff --git a/docs/source/examples/elmodels/plot_2_elboxembeddings.ipynb b/docs/source/examples/elmodels/plot_2_elboxembeddings.ipynb
@@ -1,16 +1,5 @@
 {
   "cells": [
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "collapsed": false
-      },
-      "outputs": [],
-      "source": [
-        "%matplotlib inline"
-      ]
-    },
     {
       "cell_type": "markdown",
       "metadata": {},
@@ -47,7 +36,7 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "## ELBoxEmbeddings model\n\nThe module :class:`mowl.nn.el.elem.module.ELBoxModule` is used in the :class:`mowl.models.elboxembeddings.model.ELBoxEmbeddings`.\nIn the use case of this example, we will test over a biological problem, which is\nprotein-protein interactions. Given two proteins $p_1,p_2$, the phenomenon\n\"$p_1$ interacts with $p_2$\" is encoded using GCI 2 as:\n\n\\begin{align}p_1 \\sqsubseteq interacts\\_with. p_2\\end{align}\n\nFor that, we can use the class :class:`mowl.models.elembeddings.examples.model_ppi.ELBoxPPI` mode, which uses the :class:`mowl.datasets.builtin.PPIYeastSlimDataset` dataset.\n\n"
+        "## ELBoxEmbeddings model\n\nThe module :class:`mowl.nn.el.elem.module.ELBoxModule` is used in the :class:`mowl.models.elboxembeddings.model.ELBoxEmbeddings`.\nIn the use case of this example, we will test over a biological problem, which is\nprotein-protein interactions. Given two proteins $p_1,p_2$, the phenomenon\n\"$p_1$ interacts with $p_2$\" is encoded using GCI 2 as:\n\n\\begin{align}p_1 \\sqsubseteq \\exists interacts\\_with. p_2\\end{align}\n\nFor that, we can use the class :class:`mowl.models.elembeddings.examples.model_ppi.ELBoxPPI` mode, which uses the :class:`mowl.datasets.builtin.PPIYeastSlimDataset` dataset.\n\n"
       ]
     },
     {
@@ -103,7 +92,7 @@
       "name": "python",
       "nbconvert_exporter": "python",
       "pygments_lexer": "ipython3",
-      "version": "3.8.16"
+      "version": "3.8.19"
     }
   },
   "nbformat": 4,

diff --git a/docs/source/examples/elmodels/plot_2_elboxembeddings.py b/docs/source/examples/elmodels/plot_2_elboxembeddings.py
@@ -50,7 +50,7 @@
 # ":math:`p_1` interacts with :math:`p_2`" is encoded using GCI 2 as:
 #
 # .. math::
-#    p_1 \sqsubseteq interacts\_with. p_2
+#    p_1 \sqsubseteq \exists interacts\_with. p_2
 #
 # For that, we can use the class :class:`mowl.models.elembeddings.examples.model_ppi.ELBoxPPI` mode, which uses the :class:`mowl.datasets.builtin.PPIYeastSlimDataset` dataset.
 

diff --git a/docs/source/examples/elmodels/plot_2_elboxembeddings.rst b/docs/source/examples/elmodels/plot_2_elboxembeddings.rst
diff --git a/mowl/base_models/model.py b/mowl/base_models/model.py
@@ -2,6 +2,7 @@
 import tempfile
 from mowl.datasets import Dataset
 from mowl.owlapi import OWLAPIAdapter
+from mowl.evaluation import Evaluator
 from java.util import HashSet
 
 
@@ -28,13 +29,23 @@ def __init__(self, dataset, model_filepath=None):
         self.dataset = dataset
         self._model_filepath = model_filepath
         self._testing_set = None
-
+        self._evaluator = None
+        self._evaluation_model = None
+        self._metrics = None
 
     def train(self, *args, **kwargs):
         '''Abstract method for training the model. This method must be implemented in children classes
         '''
         raise NotImplementedError("Method train is not implemented.")
 
+    def evaluate(self, *args, **kwargs):
+        if self._evaluator is None:
+            raise AttributeError("Evaluator is not set. Please set the evaluator before evaluating the model.")
+
+        self._metrics = self._evaluator.evaluate(self.evaluation_model)
+
+
+
     def eval_fn(self, *args, **kwargs):
         raise NotImplementedError("Method eval_fn is not implemented.")
 
@@ -118,6 +129,21 @@ def individual_embeddings(self):
         """
         raise NotImplementedError()
 
+    @versionadded(version="1.0.0")
+    @property
+    def evaluation_model(self):
+        """Returns the evaluation model. In models relying on Word2Vec embeddings, this method calls an auxiliary evaluation model for scoring. Methods using KGEs or Geometric Embeddings would return the model itself."""
+        raise NotImplementedError("Method evaluation_model must be implemented in a subclass.")
+
+    @versionadded(version="1.0.0")
+    @property
+    def metrics(self):
+        if self._metrics is None:
+            raise AttributeError("Model has not been evaluated yet.")
+        else:
+            return self._metrics
+
+
     @versionadded(version="0.2.0")
     def add_axioms(self, *axioms):
         """
@@ -139,3 +165,20 @@ def from_pretrained(self, file_name):
         raise NotImplementedError()
 
 
+
+    @versionadded(version="1.0.0")
+    def set_evaluator(self, evaluator, *args, **kwargs):
+        """
+        This method sets the evaluator for the model.
+
+        :param evaluator: Evaluator object.
+        :type evaluator: mowl.evaluation.base.Evaluator
+        """
+
+        if isinstance(evaluator, Evaluator):
+            self._evaluator = evaluator
+        else:
+            self._evaluator = evaluator(self.dataset, **kwargs)
+
+
+
diff --git a/mowl/evaluation/__init__.py b/mowl/evaluation/__init__.py
@@ -1,4 +1,5 @@
-from mowl.evaluation.base import Evaluator, AxiomsRankBasedEvaluator
-from mowl.evaluation.rank_based import RankBasedEvaluator, ModelRankBasedEvaluator, EmbeddingsRankBasedEvaluator
+from mowl.evaluation.base import Evaluator
+from mowl.evaluation.subsumption import SubsumptionEvaluator
+from mowl.evaluation.ppi import PPIEvaluator
 
 
diff --git a/mowl/evaluation/base.py b/mowl/evaluation/base.py
diff --git a/mowl/models/boxsquaredel/model.py b/mowl/models/boxsquaredel/model.py
@@ -1,7 +1,7 @@
 
 from mowl.nn import BoxSquaredELModule
 from mowl.base_models.elmodel import EmbeddingELModel
-from mowl.models.boxsquaredel.evaluate import BoxSquaredELPPIEvaluator
+
 import torch as th
 from torch import nn
 

diff --git a/mowl/models/elboxembeddings/examples/model_ppi.py b/mowl/models/elboxembeddings/examples/model_ppi.py
@@ -5,7 +5,7 @@
 import logging
 import numpy as np
 
-from mowl.models.elboxembeddings.evaluate import ELBoxEmbeddingsPPIEvaluator
+from mowl.evaluation import PPIEvaluator
 
 from tqdm import trange, tqdm
 
@@ -85,13 +85,9 @@ def evaluate_ppi(self):
         print('Load the best model', self.model_filepath)
         self.load_best_model()
         with th.no_grad():
-            eval_method = self.module.gci2_loss
+            metrics = self.evaluate()
+            print(metrics)
 
-            evaluator = ELBoxEmbeddingsPPIEvaluator(
-                self.dataset.testing, eval_method, self.dataset.ontology, self.class_index_dict,
-                self.object_property_index_dict, device=self.device)
-            evaluator()
-            evaluator.print_metrics()
 
 
 
diff --git a/mowl/models/elboxembeddings/model.py b/mowl/models/elboxembeddings/model.py
@@ -1,7 +1,7 @@
 
 from mowl.nn import ELBoxModule
 from mowl.base_models.elmodel import EmbeddingELModel
-from mowl.models.elboxembeddings.evaluate import ELBoxEmbeddingsPPIEvaluator
+from mowl.evaluation import PPIEvaluator
 
 import torch as th
 from torch import nn
@@ -35,6 +35,8 @@ def __init__(self,
         self.extended = False
         self.init_module()
 
+        self.set_evaluator(PPIEvaluator)
+
     def init_module(self):
         self.module = ELBoxModule(
             len(self.class_index_dict),

diff --git a/mowl/models/elembeddings/examples/model_ppi.py b/mowl/models/elembeddings/examples/model_ppi.py
@@ -1,6 +1,5 @@
 from mowl.base_models.elmodel import EmbeddingELModel
-
-from mowl.models.elembeddings.evaluate import ELEmbeddingsPPIEvaluator
+from mowl.evaluation import PPIEvaluator
 from mowl.projection.factory import projector_factory
 from tqdm import trange, tqdm
 import torch as th
@@ -16,7 +15,9 @@ class ELEmPPI(ELEmbeddings):
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
 
+        self.set_evaluator(PPIEvaluator)
 
+
     def train(self, validate_every=1000):
 
         optimizer = th.optim.Adam(self.module.parameters(), lr=self.learning_rate)
@@ -74,11 +75,7 @@ def evaluate_ppi(self):
         print('Load the best model', self.model_filepath)
         self.load_best_model()
         with th.no_grad():
-            eval_method = self.module.gci2_loss
-
-            evaluator = ELEmbeddingsPPIEvaluator(
-                self.dataset.testing, eval_method, self.dataset.ontology, self.class_index_dict,
-                self.object_property_index_dict, device=self.device)
-            evaluator()
-            evaluator.print_metrics()
-
+            metrics = self.evaluate()
+            print(metrics)
+
+
diff --git a/mowl/models/syntactic/w2v_model.py b/mowl/models/syntactic/w2v_model.py
@@ -3,8 +3,19 @@
 from gensim.models import Word2Vec
 from gensim.models.word2vec import LineSentence
 import mowl.error.messages as msg
+import numpy as np
+import torch as th
 from deprecated.sphinx import versionadded
 
+import logging
+logger = logging.getLogger(__name__)
+handler = logging.StreamHandler()
+logger.addHandler(handler)
+logger.setLevel(logging.INFO)
+
+
+
+
 @versionadded(version="0.2.0")
 class SyntacticPlusW2VModel(SyntacticModel):
     """
@@ -17,6 +28,9 @@ def __init__(self, *args, **kwargs):
         self.update_w2v_model = False
         self._is_pretrained = False
 
+        self._evaluation_model = None
+        self.device = th.device("cuda" if th.cuda.is_available() else "cpu")
+
     @property
     def class_embeddings(self):
         if self.w2v_model is None:
@@ -54,9 +68,17 @@ def individual_embeddings(self):
         ind_embeds = {}
         for ind in self.dataset.individuals.as_str:
             if ind in self.w2v_model.wv:
-                obj_prop_embeds[ind] = self.w2v_model.wv[ind]
+                ind_embeds[ind] = self.w2v_model.wv[ind]
         return ind_embeds
-
+
+    @property
+    def evaluation_model(self):
+        if self._evaluation_model is None:
+            self._evaluation_model = EvaluationModel(self.w2v_model, self.dataset, self.embed_dim, self.device)
+
+        return self._evaluation_model
+
+
     def set_w2v_model(self, *args, **kwargs):
         """
         This method sets the :class:`gensim.models.word2vec.Word2Vec` model to be used in the syntactic model.
@@ -65,8 +87,10 @@ def set_w2v_model(self, *args, **kwargs):
         :param kwargs: Keyword arguments to be passed to the :class:`Word2Vec <gensim.models.word2vec.Word2Vec>` constructor.
         
         """
+
         self.w2v_model = Word2Vec(*args, **kwargs)
-
+        self.embed_dim = self.w2v_model.vector_size
+
     def train(self, epochs=None):
         """
         Triggers the Word2Vec training process.
@@ -123,3 +147,47 @@ def from_pretrained(self, model):
 
 
 
+class EvaluationModel(th.nn.Module):
+    def __init__(self, w2v_model, dataset, embedding_size, device):
+        super().__init__()
+        self.embedding_size = embedding_size
+        self.device = device
+
+        self.embeddings = self.init_module(w2v_model, dataset)
+
+
+    def init_module(self, w2v_model, dataset):
+        classes = dataset.classes.as_str
+        class_to_id = {class_: i for i, class_ in enumerate(classes)}
+
+        w2v_vectors = w2v_model.wv
+        embeddings_list = []
+        for class_ in classes:
+            if class_ in w2v_vectors:
+                embeddings_list.append(w2v_vectors[class_])
+            else:
+                logger.warning(f"Class {class_} not found in w2v model")
+                embeddings_list.append(np.random.rand(self.embedding_size))
+
+        embeddings_list = np.array(embeddings_list)
+        embeddings = th.tensor(embeddings_list).to(self.device)
+        return th.nn.Embedding.from_pretrained(embeddings)
+
+
+    def forward(self, data, *args, **kwargs):
+
+        x = data[:, 0]
+        y = data[:, 1]
+
+        logger.debug(f"X shape: {x.shape}")
+        logger.debug(f"Y shape: {y.shape}")
+
+        x = self.embeddings(x)
+        y = self.embeddings(y)
+
+        logger.debug(f"X shape: {x.shape}")
+        logger.debug(f"Y shape: {y.shape}")
+
+        dot_product = th.sum(x * y, dim=1)
+        logger.debug(f"Dot product shape: {dot_product.shape}")
+        return 1 - th.sigmoid(dot_product)
diff --git a/mowl/nn/el/elbe/module.py b/mowl/nn/el/elbe/module.py
@@ -5,7 +5,7 @@
 from deprecated.sphinx import deprecated
 
 
-@deprecated(version="0.4.0", reason="Use ELBEModule instead")
+@deprecated(version="1.0.0", reason="Use ELBEModule instead")
 class ELBoxModule(ELModule):
     """Implementation of ELBoxEmbeddings from [peng2020]_.
     """

diff --git a/tests/inductive/test_syntactic_w2v_model.py b/tests/inductive/test_syntactic_w2v_model.py
@@ -6,7 +6,7 @@
 import mowl.error.messages as msg
 import os
 
-class TestRandomWalkModel(TestCase):
+class TestSyntacticModel(TestCase):
 
     @classmethod
     def setUpClass(self):