🩹 👾 Fix ERMLP functional form (pykeen#444)

mberr · cthoyt · web-flow · commit 0d396bbb005a · 2021-06-10T21:08:22.000+02:00
* Fix ERMLP functional form
* Add tests for batch_size=1 for all score_* methods
* skip batch_size=1 for BatchNorm interaction modules
* Add mypy typestub external packages
This is now necessary as of mypy 0.900

Co-authored-by: Charles Tapley Hoyt &lt;cthoyt@gmail.com&gt;
diff --git a/src/pykeen/models/resolve.py b/src/pykeen/models/resolve.py
@@ -63,6 +63,7 @@
 from .nbase import ERModel, EmbeddingSpecificationHint
 from ..nn.emb import EmbeddingSpecification, RepresentationModule
 from ..nn.modules import Interaction, interaction_resolver
+from ..typing import HeadRepresentation, RelationRepresentation, TailRepresentation
 
 __all__ = [
     'make_model',
@@ -74,7 +75,11 @@
 
 def make_model(
     dimensions: Union[int, Mapping[str, int]],
-    interaction: Union[str, Interaction, Type[Interaction]],
+    interaction: Union[
+        str,
+        Interaction[HeadRepresentation, RelationRepresentation, TailRepresentation],
+        Type[Interaction[HeadRepresentation, RelationRepresentation, TailRepresentation]],
+    ],
     interaction_kwargs: Optional[Mapping[str, Any]] = None,
     entity_representations: EmbeddingSpecificationHint = None,
     relation_representations: EmbeddingSpecificationHint = None,
@@ -104,7 +109,10 @@ def __str__(self):
 
 def make_model_cls(
     dimensions: Union[int, Mapping[str, int]],
-    interaction: Union[str, Interaction, Type[Interaction]],
+    interaction: Union[
+        str, Interaction[HeadRepresentation, RelationRepresentation, TailRepresentation],
+        Type[Interaction[HeadRepresentation, RelationRepresentation, TailRepresentation]],
+    ],
     interaction_kwargs: Optional[Mapping[str, Any]] = None,
     entity_representations: EmbeddingSpecificationHint = None,
     relation_representations: EmbeddingSpecificationHint = None,
@@ -117,15 +125,15 @@ def make_model_cls(
 
     entity_representations, relation_representations = _normalize_entity_representations(
         dimensions=dimensions,
-        interaction=interaction_instance.__class__,
+        interaction=interaction_instance.__class__,  # type: ignore
         entity_representations=entity_representations,
         relation_representations=relation_representations,
     )
 
     # TODO pack/unpack dimensions as default kwargs such that they don't actually need to be used
     #  to create the class
 
-    class ChildERModel(ERModel):
+    class ChildERModel(ERModel[HeadRepresentation, RelationRepresentation, TailRepresentation]):
         def __init__(self, **kwargs) -> None:
             """Initialize the model."""
             super().__init__(
@@ -142,7 +150,7 @@ def __init__(self, **kwargs) -> None:
 
 def _normalize_entity_representations(
     dimensions: Union[int, Mapping[str, int]],
-    interaction: Type[Interaction],
+    interaction: Type[Interaction[HeadRepresentation, RelationRepresentation, TailRepresentation]],
     entity_representations: EmbeddingSpecificationHint,
     relation_representations: EmbeddingSpecificationHint,
 ) -> Tuple[
diff --git a/src/pykeen/nn/functional.py b/src/pykeen/nn/functional.py
@@ -11,7 +11,6 @@
 from __future__ import annotations
 
 import functools
-from dataclasses import dataclass
 from typing import Optional, Tuple, Union
 
 import numpy
@@ -54,52 +53,6 @@
 ]
 
 
-@dataclass
-class SizeInformation:
-    """Size information of generic score function."""
-
-    #: The batch size of the head representations.
-    bh: int
-
-    #: The number of head representations per batch
-    nh: int
-
-    #: The batch size of the relation representations.
-    br: int
-
-    #: The number of relation representations per batch
-    nr: int
-
-    #: The batch size of the tail representations.
-    bt: int
-
-    #: The number of tail representations per batch
-    nt: int
-
-    @property
-    def same(self) -> bool:
-        """Whether all representations have the same shape."""
-        return (
-            self.bh == self.br
-            and self.bh == self.bt
-            and self.nh == self.nr
-            and self.nh == self.nt
-        )
-
-    @classmethod
-    def extract(
-        cls,
-        h: torch.Tensor,
-        r: torch.Tensor,
-        t: torch.Tensor,
-    ) -> SizeInformation:
-        """Extract size information from tensors."""
-        bh, nh = h.shape[:2]
-        br, nr = r.shape[:2]
-        bt, nt = t.shape[:2]
-        return cls(bh=bh, nh=nh, br=br, nr=nr, bt=bt, nt=nt)
-
-
 def _extract_sizes(
     h: torch.Tensor,
     r: torch.Tensor,
@@ -347,13 +300,11 @@ def ermlp_interaction(
     :return: shape: (batch_size, num_heads, num_relations, num_tails)
         The scores.
     """
-    sizes = SizeInformation.extract(h, r, t)
-
     # same shape
-    if sizes.same:
+    if h.shape == r.shape and h.shape == t.shape:
         return final(activation(
             hidden(torch.cat([h, r, t], dim=-1).view(-1, 3 * h.shape[-1]))),
-        ).view(sizes.bh, sizes.nh, sizes.nr, sizes.nt)
+        ).view(*h.shape[:-1])
 
     hidden_dim = hidden.weight.shape[0]
     # split, shape: (embedding_dim, hidden_dim)
diff --git a/src/pykeen/pipeline/api.py b/src/pykeen/pipeline/api.py
@@ -175,7 +175,7 @@
 import pickle
 import time
 from dataclasses import dataclass, field
-from typing import Any, Collection, Dict, Iterable, List, Mapping, MutableMapping, Optional, Type, Union
+from typing import Any, Collection, Dict, Iterable, List, Mapping, MutableMapping, Optional, Type, Union, cast
 
 import pandas as pd
 import torch
@@ -881,7 +881,7 @@ def pipeline(  # noqa: C901
         )
 
     if isinstance(model, Model):
-        model_instance = model
+        model_instance = cast(Model, model)
         # TODO should training be reset?
         # TODO should kwargs for loss and regularizer be checked and raised for?
     else:
diff --git a/tests/cases.py b/tests/cases.py
@@ -10,7 +10,9 @@
 import traceback
 import unittest
 from abc import ABC, abstractmethod
-from typing import Any, ClassVar, Collection, Dict, Mapping, MutableMapping, Optional, Sequence, Tuple, Type, TypeVar
+from typing import (
+    Any, ClassVar, Collection, Dict, Iterable, Mapping, MutableMapping, Optional, Sequence, Tuple, Type, TypeVar,
+)
 from unittest.case import SkipTest
 from unittest.mock import patch
 
@@ -40,7 +42,7 @@
 from pykeen.training import LCWATrainingLoop, SLCWATrainingLoop, TrainingLoop
 from pykeen.triples import TriplesFactory
 from pykeen.typing import HeadRepresentation, MappedTriples, RelationRepresentation, TailRepresentation
-from pykeen.utils import all_in_bounds, resolve_device, set_random_seed, unpack_singletons
+from pykeen.utils import all_in_bounds, get_batchnorm_modules, resolve_device, set_random_seed, unpack_singletons
 from tests.constants import EPSILON
 from tests.mocks import CustomRepresentations
 from tests.utils import rand
@@ -380,25 +382,34 @@ def _check_scores(self, scores: torch.FloatTensor, exp_shape: Tuple[int, ...]):
     def _additional_score_checks(self, scores):
         """Additional checks for scores."""
 
+    @property
+    def _score_batch_sizes(self) -> Iterable[int]:
+        """Return the list of batch sizes to test."""
+        if get_batchnorm_modules(self.instance):
+            return [self.batch_size]
+        return [1, self.batch_size]
+
     def test_score_hrt(self):
         """Test score_hrt."""
-        h, r, t = self._get_hrt(
-            (self.batch_size,),
-            (self.batch_size,),
-            (self.batch_size,),
-        )
-        scores = self.instance.score_hrt(h=h, r=r, t=t)
-        self._check_scores(scores=scores, exp_shape=(self.batch_size, 1))
+        for batch_size in self._score_batch_sizes:
+            h, r, t = self._get_hrt(
+                (batch_size,),
+                (batch_size,),
+                (batch_size,),
+            )
+            scores = self.instance.score_hrt(h=h, r=r, t=t)
+            self._check_scores(scores=scores, exp_shape=(batch_size, 1))
 
     def test_score_h(self):
         """Test score_h."""
-        h, r, t = self._get_hrt(
-            (self.num_entities,),
-            (self.batch_size,),
-            (self.batch_size,),
-        )
-        scores = self.instance.score_h(all_entities=h, r=r, t=t)
-        self._check_scores(scores=scores, exp_shape=(self.batch_size, self.num_entities))
+        for batch_size in self._score_batch_sizes:
+            h, r, t = self._get_hrt(
+                (self.num_entities,),
+                (batch_size,),
+                (batch_size,),
+            )
+            scores = self.instance.score_h(all_entities=h, r=r, t=t)
+            self._check_scores(scores=scores, exp_shape=(batch_size, self.num_entities))
 
     def test_score_h_slicing(self):
         """Test score_h with slicing."""
@@ -415,17 +426,18 @@ def test_score_h_slicing(self):
 
     def test_score_r(self):
         """Test score_r."""
-        h, r, t = self._get_hrt(
-            (self.batch_size,),
-            (self.num_relations,),
-            (self.batch_size,),
-        )
-        scores = self.instance.score_r(h=h, all_relations=r, t=t)
-        if len(self.cls.relation_shape) == 0:
-            exp_shape = (self.batch_size, 1)
-        else:
-            exp_shape = (self.batch_size, self.num_relations)
-        self._check_scores(scores=scores, exp_shape=exp_shape)
+        for batch_size in self._score_batch_sizes:
+            h, r, t = self._get_hrt(
+                (batch_size,),
+                (self.num_relations,),
+                (batch_size,),
+            )
+            scores = self.instance.score_r(h=h, all_relations=r, t=t)
+            if len(self.cls.relation_shape) == 0:
+                exp_shape = (batch_size, 1)
+            else:
+                exp_shape = (batch_size, self.num_relations)
+            self._check_scores(scores=scores, exp_shape=exp_shape)
 
     def test_score_r_slicing(self):
         """Test score_r with slicing."""
@@ -444,13 +456,14 @@ def test_score_r_slicing(self):
 
     def test_score_t(self):
         """Test score_t."""
-        h, r, t = self._get_hrt(
-            (self.batch_size,),
-            (self.batch_size,),
-            (self.num_entities,),
-        )
-        scores = self.instance.score_t(h=h, r=r, all_entities=t)
-        self._check_scores(scores=scores, exp_shape=(self.batch_size, self.num_entities))
+        for batch_size in self._score_batch_sizes:
+            h, r, t = self._get_hrt(
+                (batch_size,),
+                (batch_size,),
+                (self.num_entities,),
+            )
+            scores = self.instance.score_t(h=h, r=r, all_entities=t)
+            self._check_scores(scores=scores, exp_shape=(batch_size, self.num_entities))
 
     def test_score_t_slicing(self):
         """Test score_t with slicing."""
diff --git a/tox.ini b/tox.ini
@@ -148,7 +148,12 @@ commands =
 description = Check all python files do not have mistaken trailing commas
 
 [testenv:mypy]
-deps = mypy
+deps =
+    mypy
+    types-click
+    types-pkg_resources
+    types-requests
+    types-tabulate
 skip_install = true
 commands = mypy --ignore-missing-imports \
     src/pykeen/datasets \