|
| 1 | +from mowl.evaluation.base import AxiomsRankBasedEvaluator |
| 2 | +from mowl.projection.factory import projector_factory |
| 3 | +from mowl.projection.edge import Edge |
| 4 | +import logging |
| 5 | +import numpy as np |
| 6 | +from scipy.stats import rankdata |
| 7 | +import torch as th |
| 8 | + |
| 9 | + |
| 10 | +class BoxSquaredELPPIEvaluator(AxiomsRankBasedEvaluator): |
| 11 | + |
| 12 | + def __init__( |
| 13 | + self, |
| 14 | + axioms, |
| 15 | + eval_method, |
| 16 | + axioms_to_filter, |
| 17 | + class_name_indexemb, |
| 18 | + rel_name_indexemb, |
| 19 | + device="cpu", |
| 20 | + verbose=False |
| 21 | + ): |
| 22 | + |
| 23 | + super().__init__(axioms, eval_method, axioms_to_filter, device, verbose) |
| 24 | + |
| 25 | + self.class_name_indexemb = class_name_indexemb |
| 26 | + self.relation_name_indexemb = rel_name_indexemb |
| 27 | + |
| 28 | + self._loaded_training_scores = False |
| 29 | + self._loaded_eval_data = False |
| 30 | + self._loaded_ht_data = False |
| 31 | + |
| 32 | + def _load_head_tail_entities(self): |
| 33 | + if self._loaded_ht_data: |
| 34 | + return |
| 35 | + |
| 36 | + ents, _ = Edge.getEntitiesAndRelations(self.axioms) |
| 37 | + ents_filter, _ = Edge.getEntitiesAndRelations(self.axioms_to_filter) |
| 38 | + |
| 39 | + entities = list(set(ents) | set(ents_filter)) |
| 40 | + |
| 41 | + self.head_entities = set() |
| 42 | + for e in entities: |
| 43 | + if e in self.class_name_indexemb: |
| 44 | + self.head_entities.add(e) |
| 45 | + else: |
| 46 | + logging.info("Entity %s not present in the embeddings dictionary. Ignoring it.", e) |
| 47 | + |
| 48 | + self.tail_entities = set() |
| 49 | + for e in entities: |
| 50 | + if e in self.class_name_indexemb: |
| 51 | + self.tail_entities.add(e) |
| 52 | + else: |
| 53 | + logging.info("Entity %s not present in the embeddings dictionary. Ignoring it.", e) |
| 54 | + |
| 55 | + self.head_name_indexemb = {k: self.class_name_indexemb[k] for k in self.head_entities} |
| 56 | + self.tail_name_indexemb = {k: self.class_name_indexemb[k] for k in self.tail_entities} |
| 57 | + |
| 58 | + self.head_indexemb_indexsc = {v: k for k, v in enumerate(self.head_name_indexemb.values())} |
| 59 | + self.tail_indexemb_indexsc = {v: k for k, v in enumerate(self.tail_name_indexemb.values())} |
| 60 | + |
| 61 | + self._loaded_ht_data = True |
| 62 | + |
| 63 | + def _load_training_scores(self): |
| 64 | + if self._loaded_training_scores: |
| 65 | + return self.training_scores |
| 66 | + |
| 67 | + self._load_head_tail_entities() |
| 68 | + |
| 69 | + training_scores = np.ones((len(self.head_entities), len(self.tail_entities)), |
| 70 | + dtype=np.int32) |
| 71 | + |
| 72 | + if self._compute_filtered_metrics: |
| 73 | + # careful here: c must be in head entities and d must be in tail entities |
| 74 | + for axiom in self.axioms_to_filter: |
| 75 | + c, _, d = axiom.astuple() |
| 76 | + if (c not in self.head_entities) or not (d in self.tail_entities): |
| 77 | + continue |
| 78 | + |
| 79 | + c, d = self.head_name_indexemb[c], self.tail_name_indexemb[d] |
| 80 | + c, d = self.head_indexemb_indexsc[c], self.tail_indexemb_indexsc[d] |
| 81 | + |
| 82 | + training_scores[c, d] = 10000 |
| 83 | + |
| 84 | + logging.info("Training scores created") |
| 85 | + |
| 86 | + self._loaded_training_scores = True |
| 87 | + return training_scores |
| 88 | + |
| 89 | + def _init_axioms(self, axioms): |
| 90 | + |
| 91 | + if axioms is None: |
| 92 | + return None |
| 93 | + |
| 94 | + projector = projector_factory("taxonomy_rels", relations=["http://interacts_with"]) |
| 95 | + |
| 96 | + edges = projector.project(axioms) |
| 97 | + return edges # List of Edges |
| 98 | + |
| 99 | + def compute_axiom_rank(self, axiom): |
| 100 | + |
| 101 | + self.training_scores = self._load_training_scores() |
| 102 | + |
| 103 | + c, r, d = axiom.astuple() |
| 104 | + |
| 105 | + if not (c in self.head_entities) or not (d in self.tail_entities): |
| 106 | + return None, None, None |
| 107 | + |
| 108 | + # Embedding indices |
| 109 | + c_emb_idx, d_emb_idx = self.head_name_indexemb[c], self.tail_name_indexemb[d] |
| 110 | + |
| 111 | + # Scores matrix labels |
| 112 | + c_sc_idx, d_sc_idx = self.head_indexemb_indexsc[c_emb_idx], |
| 113 | + self.tail_indexemb_indexsc[d_emb_idx] |
| 114 | + |
| 115 | + r = self.relation_name_indexemb[r] |
| 116 | + |
| 117 | + data = th.tensor([ |
| 118 | + [c_emb_idx, r, self.tail_name_indexemb[x]] for x in |
| 119 | + self.tail_entities]).to(self.device) |
| 120 | + |
| 121 | + res = self.eval_method(data).squeeze().cpu().detach().numpy() |
| 122 | + |
| 123 | + # self.testing_predictions[c_sc_idx, :] = res |
| 124 | + index = rankdata(res, method='average') |
| 125 | + rank = index[d_sc_idx] |
| 126 | + |
| 127 | + findex = rankdata((res * self.training_scores[c_sc_idx, :]), method='average') |
| 128 | + frank = findex[d_sc_idx] |
| 129 | + |
| 130 | + return rank, frank, len(self.tail_entities) |
0 commit comments