Skip to content

Develop #86

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 56 commits into from
Nov 18, 2024
Merged
Changes from 1 commit
Commits
Show all changes
56 commits
Select commit Hold shift + click to select a range
6518593
:construction: return alc datasets
ferzcam Feb 19, 2024
9515e29
Merge branch 'ontoem' into alc
ferzcam Jun 2, 2024
17623b7
:bookmark: ontoem
ferzcam Jul 17, 2024
9960d98
:art: ontoem: added opa2vec-ppi
ferzcam Nov 4, 2024
4796a3c
:art: opa2vec ppi_yeast
ferzcam Nov 4, 2024
adadb28
:art: opa2vec-nn ppi_yeast
ferzcam Nov 4, 2024
d3e45bc
Merge branch 'main' into ontoem
ferzcam Nov 5, 2024
0503b14
⚗️ ":art: owl2vec*-sim ppi_yeast"
ferzcam Nov 5, 2024
5058dcd
:whiteband: set logging level to INFO for jvm
ferzcam Nov 5, 2024
c180dde
⚗️ Owl2vec* with KGs
ferzcam Nov 6, 2024
370b7c8
🐛 KGE triples factory includes validation and testing entities that a…
ferzcam Nov 6, 2024
694f3f8
🩹 Fix in wandb config
ferzcam Nov 6, 2024
735d0ef
⚗️ OPA2Vec: subsumption go and foodon
ferzcam Nov 10, 2024
ac484cc
⚗️ OPA2Vec-NN go and foodon
ferzcam Nov 10, 2024
3e8a478
⚗️ OPA2Vec-NN go and foodon
ferzcam Nov 10, 2024
d758ad4
🐛 Fixed FoodOn classes lookup
ferzcam Nov 10, 2024
42c3e16
⚗️ OWL2VecStar Sim GO and FoodOn
ferzcam Nov 10, 2024
e536be5
⚗️ OWL2Vec*-KG GO and FoodOn
ferzcam Nov 11, 2024
b00f1cc
:art: box2el subsumption
ferzcam Nov 11, 2024
b21894e
:art: box2el subsumption
ferzcam Nov 11, 2024
398c94d
🔀 Resolve conflicts
ferzcam Nov 11, 2024
7e247b8
⚰️ Clean code
ferzcam Nov 11, 2024
5126867
⚗️ ELEmbeddings subsumption
ferzcam Nov 11, 2024
1e6936c
⚗️ Run EL models for GO and FoodOn
ferzcam Nov 11, 2024
4d22982
🐛 Added negative logits to EL models
ferzcam Nov 11, 2024
885be2c
⚗️ BoxEL GO and FoodOn
ferzcam Nov 11, 2024
4dc6d19
⚗️ OPA2Vec PPI Human
ferzcam Nov 11, 2024
bf48ab5
⚗️ OPA2Vec PPI human
ferzcam Nov 11, 2024
f4bffc5
⚗️ OPA2Vec-NN PPI human
ferzcam Nov 11, 2024
9c83829
⚗️ OWL2VecStar Sim PPI Human
ferzcam Nov 12, 2024
7f4e0b4
⚗️ OWL2Vec*-KG PPI Human
ferzcam Nov 12, 2024
8ccbaed
⚗️ ELEmbeddings PPI Human
ferzcam Nov 12, 2024
ebf2e24
⚗️ BoxEL and Box2EL PPI Human
ferzcam Nov 12, 2024
b3cd4ed
🎨 CatE updated with latest version (NeSy 2024) (#59)
ferzcam Nov 13, 2024
c1d6f3e
⚗️ CatE GO
ferzcam Nov 13, 2024
5f5257e
⚗️ CatE PPI Yeast and Human
ferzcam Nov 13, 2024
9c808ae
🧐 Updatea PPI dataset creation scripts
ferzcam Nov 13, 2024
964ab3c
:fire:
ferzcam Nov 13, 2024
c5279f9
🔧 Updated Python version support, Changelog, Readme...
ferzcam Nov 13, 2024
c1743e2
🔀 Fix merge conflicts between ontoem and develop
ferzcam Nov 13, 2024
cf60497
🩹 Minor changes
ferzcam Nov 14, 2024
11f2863
🔀 Revolve merging conflicts: ontoem --> develop
ferzcam Nov 14, 2024
a5b27a5
🐛 Fix multiple bindings to slf4j java logging library
ferzcam Nov 14, 2024
8622a99
✅ Updated tests for CatE (#59)
ferzcam Nov 18, 2024
71ff459
♻️ Improved evaluation module. Added several evaluators.
ferzcam Nov 18, 2024
2d06da0
🎨 Updated ALC dataset tests. (:construction: #71)
ferzcam Nov 18, 2024
a52a3b8
🎨 Minor update to dataset module
ferzcam Nov 18, 2024
0fdc06f
🔧 Updated README, setup.py, docs, etc...
ferzcam Nov 18, 2024
cf098df
🌱 Added missing file
ferzcam Nov 18, 2024
2b89c34
💚 Fixed Github actions scripts
ferzcam Nov 18, 2024
fda4505
Merge branch 'main' into develop
ferzcam Nov 18, 2024
f3b0a4c
⬆️ Updated scipy dependencies
ferzcam Nov 18, 2024
3b21dc3
:fire:
ferzcam Nov 18, 2024
0497b7e
🔧 Updated setup.cfg
ferzcam Nov 18, 2024
d3e2c8c
🔧 Dependencies gensim, scipy and numpy moved to pip instead of conda
ferzcam Nov 18, 2024
6e32dc5
🐛 Updated server url for datasets
ferzcam Nov 18, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
⚗️ ELEmbeddings PPI Human
  • Loading branch information
ferzcam committed Nov 12, 2024
commit 8ccbaedd2d925e7d40e18fb8a77652955fcbd8fa
81 changes: 22 additions & 59 deletions experiments/elembeddings/ppi.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import sys
sys.path.append("../../")
sys.path.append("../")
import mowl
mowl.init_jvm("10g")
@@ -9,6 +8,7 @@
from org.semanticweb.owlapi.model import AxiomType as Ax
from evaluators import PPIEvaluator
from datasets import PPIDataset
from utils import print_as_md
from tqdm import tqdm
from mowl.nn import ELEmModule
import torch as th
@@ -27,8 +27,7 @@
th.autograd.set_detect_anomaly(True)

@ck.command()
@ck.option("--dataset_name", "-ds", type=ck.Choice(["ppi_yeast", "ppi_yeast_slim"]), default="ppi_yeast_slim")
@ck.option("--evaluator_name", "-e", default="ppi", help="Evaluator to use")
@ck.option("--dataset_name", "-ds", type=ck.Choice(["ppi_yeast", "ppi_human"]), default="ppi_yeast")
@ck.option("--embed_dim", "-dim", default=50, help="Embedding dimension")
@ck.option("--batch_size", "-bs", default=300000, help="Batch size")
@ck.option("--module_margin", "-mm", default=0.1, help="Margin for the module")
@@ -40,33 +39,39 @@
@ck.option("--wandb_description", "-desc", default="default")
@ck.option("--no_sweep", "-ns", is_flag=True)
@ck.option("--only_test", "-ot", is_flag=True)
def main(dataset_name, evaluator_name, embed_dim, batch_size,
def main(dataset_name, embed_dim, batch_size,
module_margin, loss_margin, learning_rate, epochs,
evaluate_every, device, wandb_description, no_sweep,
only_test):

seed_everything(42)

evaluator_name = "ppi"

wandb_logger = wandb.init(entity="zhapacfp_team", project="ontoem", group=f"ppi", name=wandb_description)
wandb_logger = wandb.init(entity="zhapacfp_team", project="ontoem", group=f"elembeddings_{dataset_name}", name=wandb_description)


if loss_margin == int(loss_margin):
loss_margin = int(loss_margin)
if module_margin == int(module_margin):
module_margin = int(module_margin)

if no_sweep:
wandb_logger.log({"dataset_name": dataset_name,
"embed_dim": embed_dim,
"module_margin": module_margin,
"learning_rate": learning_rate
})
else:
dataset_name = wandb.config.dataset_name
embed_dim = wandb.config.embed_dim
module_margin = wandb.config.module_margin
learning_rate = wandb.config.learning_rate

root_dir, dataset = dataset_resolver(dataset_name)

model_dir = f"{root_dir}/../models/"
os.makedirs(model_dir, exist_ok=True)

model_filepath = f"{model_dir}/{embed_dim}_{batch_size}_{module_margin}_{loss_margin}_{learning_rate}.pt"
model_filepath = f"{model_dir}/elembeddings_{embed_dim}_{module_margin}_{loss_margin}_{learning_rate}.pt"
model = GeometricELModel(evaluator_name, dataset, batch_size,
embed_dim, module_margin, loss_margin,
learning_rate, model_filepath,
@@ -83,46 +88,19 @@ def main(dataset_name, evaluator_name, embed_dim, batch_size,
wandb_logger.log(metrics)


def print_as_md(overall_metrics):

metrics = ["test_mr", "test_mrr", "test_auc", "test_hits@1", "test_hits@3", "test_hits@10", "test_hits@50", "test_hits@100"]
filt_metrics = [k.replace("_", "_f_") for k in metrics]

string_metrics = "| Property | MR | MRR | AUC | Hits@1 | Hits@3 | Hits@10 | Hits@50 | Hits@100 | \n"
string_metrics += "| --- | --- | --- | --- | --- | --- | --- | --- | --- | \n"
string_filtered_metrics = "| Property | MR | MRR | AUC | Hits@1 | Hits@3 | Hits@10 | Hits@50 | Hits@100 | \n"
string_filtered_metrics += "| --- | --- | --- | --- | --- | --- | --- | --- | --- | \n"

string_metrics += "| Overall | "
string_filtered_metrics += "| Overall | "
for metric in metrics:
if metric == "test_mr":
string_metrics += f"{int(overall_metrics[metric])} | "
else:
string_metrics += f"{overall_metrics[metric]:.4f} | "
for metric in filt_metrics:
if metric == "test_f_mr":
string_filtered_metrics += f"{int(overall_metrics[metric])} | "
else:
string_filtered_metrics += f"{overall_metrics[metric]:.4f} | "


print(string_metrics)
print("\n\n")
print(string_filtered_metrics)




def dataset_resolver(dataset_name):
if dataset_name.lower() == "ppi_yeast":
root_dir = "../use_cases/ppi_yeast/data/"
elif dataset_name.lower() == "ppi_yeast_slim":
root_dir = "../use_cases/ppi_yeast_slim/data/"
organism = "yeast"
elif dataset_name.lower() == "ppi_human":
root_dir = "../use_cases/ppi_human/data/"
organism = "human"
else:
raise ValueError(f"Dataset {dataset_name} not found")

return root_dir, PPIDataset(root_dir)
return root_dir, PPIDataset(root_dir, organism)

def evaluator_resolver(evaluator_name, *args, **kwargs):
if evaluator_name.lower() == "ppi":
@@ -141,8 +119,8 @@ def __init__(self, evaluator_name, dataset,
self.module = ELEmModule(len(self.dataset.classes),
len(self.dataset.object_properties),
len(self.dataset.individuals),
self.embed_dim,
module_margin)
embed_dim=self.embed_dim
)

self.evaluator = evaluator_resolver(evaluator_name, dataset, device)
self.learning_rate = learning_rate
@@ -189,9 +167,9 @@ def train(self):

total_train_loss = 0

loss = 0

for batch_data in main_dl:

loss = 0
batch_data = batch_data.to(self.device)
pos_logits = self.module(batch_data, "gci2").mean()
neg_idxs = th.randint(0, len(protein_ids), (len(batch_data),), device=self.device)
@@ -251,20 +229,5 @@ def test(self):

return self.evaluator.evaluate(self.module)


def test_by_property(self):
self.module.load_state_dict(th.load(self.model_filepath))
self.module.to(self.device)
self.module.eval()
return self.evaluator.evaluate_by_property(self.module)


class DummyLogger():
def __init__(self, *args, **kwargs):
pass

def log(self, *args, **kwargs):
pass

if __name__ == "__main__":
main()