Skip to content
Permalink

Comparing changes

This is a direct comparison between two commits made in this repository or its related repositories. View the default comparison for this range or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: sparks-baird/mat_discover
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: bfbd3481c018df88aaeadf5fa7c9edcc6d4fdb41
Choose a base ref
..
head repository: sparks-baird/mat_discover
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: a8ac7171467efa85c4d0d33319996d89fb02e06a
Choose a head ref
8 changes: 4 additions & 4 deletions .github/workflows/conda-build-pytest.yml
Original file line number Diff line number Diff line change
@@ -19,11 +19,11 @@ jobs:
max-parallel: 5

steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v3
with:
submodules: recursive
- name: Set up Python 3.8
uses: actions/setup-python@v2
uses: actions/setup-python@v4
with:
python-version: '3.8'

@@ -40,7 +40,7 @@ jobs:
shell: bash -l {0}
run: |
conda install git pytest conda-build conda-verify anaconda-client conda-forge::grayskull conda-forge::conda-souschef conda-forge::flit conda-forge::coveralls conda-forge::conda-souschef sgbaird::pqdm sgbaird::elmd
pip install ElM2D==0.4.1
{{ PYTHON }} -m pip install ElM2D==0.4.1
- name: Miniconda build
shell: bash -l {0}
@@ -60,4 +60,4 @@ jobs:
shell: bash -l {0}
run: |
export NUMBA_ENABLE_CUDASIM="1" # i.e. disable GPU
python -m pytest
{{ PYTHON }} -m pytest
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -176,3 +176,6 @@ figures/lc_data/UnnamedModel_lc.csv
examples/supercon.csv
examples/super_con_mat_discover_pypi.ipynb
examples/super_con_mat_discover.py
matbench_kvrh_comp_similarity.csv
matbench_kvrh_grid_similarity.csv
matbench_kvrh_total_similarity.csv
6 changes: 3 additions & 3 deletions examples/adaptive_design_compare.py
Original file line number Diff line number Diff line change
@@ -122,9 +122,9 @@
x = list(range(n_iter))
y = np.zeros((rows, cols, n_repeats + 4, n_iter))
formula = rows * [cols * [(n_repeats + 4) * [None]]]
for (col, experiment) in enumerate(experiments):
for (row, y_name) in enumerate(y_names):
for (page, sub_experiment) in enumerate(experiment):
for col, experiment in enumerate(experiments):
for row, y_name in enumerate(y_names):
for page, sub_experiment in enumerate(experiment):
y[row, col, page] = sub_experiment[y_name].values.tolist()
formula[row][col][page] = sub_experiment["formula"].values.tolist()

1 change: 1 addition & 0 deletions examples/bare_bones.py
Original file line number Diff line number Diff line change
@@ -129,6 +129,7 @@ def my_mvn(mu_x, mu_y, r):

val_k_neigh_avg = k_neigh_avg_targ[val_ids]


# %% 7. Weighted scores
def weighted_score(pred, proxy, pred_weight=1.0, proxy_weight=1.0):
"""Calculate weighted discovery score using the predicted target and proxy."""
6 changes: 3 additions & 3 deletions examples/crabnet_performance.py
Original file line number Diff line number Diff line change
@@ -103,10 +103,10 @@
x = list(range(n_iter))
y = np.zeros((rows, cols, n_repeats, n_iter))
formula = rows * [cols * [n_repeats * [None]]]
for (col, experiment) in enumerate(experiments):
for (row, y_name) in enumerate(y_names):
for col, experiment in enumerate(experiments):
for row, y_name in enumerate(y_names):
# loop through DataFrame rows: https://stackoverflow.com/a/11617194/13697228
for (page, sub_experiment) in enumerate(experiment):
for page, sub_experiment in enumerate(experiment):
y[row, col, page] = sub_experiment[y_name].values.tolist()
formula[row][col][page] = sub_experiment["formula"].values.tolist()

133 changes: 133 additions & 0 deletions examples/matbench_log_kvrh_gridrdf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
"""
An example of how to compute GRID distances directly without writing intermediate files.
This example uses manually created PyMatGen Structure objects, but could be modified to
compute Structures from another source.
NOTE: If the number of Structures gets too large, this could have memory implications.
Modified from source by @sgbaird: https://github.com/CumbyLab/gridrdf/blob/master/examples/direct_GRID_calculation_without_files.py
"""

__author__ = "James Cumby"
__email__ = "james.cumby@ed.ac.uk"

import gridrdf
import os
from matbench.bench import MatbenchBenchmark
import pandas as pd

data_source_loc = os.path.join("examples")

dummy = False

mb = MatbenchBenchmark(autoload=True, subset=["matbench_log_kvrh"])
task = list(mb.tasks)[0]
fold = 0
train_inputs, train_outputs = task.get_train_and_val_data(fold)
test_inputs, test_outputs = task.get_test_data(fold, include_target=True)
if dummy:
train_inputs = train_inputs.head(10)
train_outputs = train_outputs.head(10)
test_inputs = test_inputs.head(5)
test_outputs = test_outputs.head(5)

structures = pd.concat((train_inputs, test_inputs), axis=0).tolist()


# Variables to define cutoffs etc
maximum_grid_distance = 20
bin_size = 0.1
number_of_shells = 100


# Empty list to hold GRID arrays
grid_representations = []


# Calculate all GRIDS iteratively
for structure in structures:
prim_cell_list = list(range(len(structure)))
rdf_atoms = gridrdf.extendRDF.get_rdf_and_atoms(
structure=structure,
prim_cell_list=prim_cell_list,
max_dist=maximum_grid_distance,
)

GRID = gridrdf.extendRDF.rdf_kde(
rdf_atoms=rdf_atoms, max_dist=maximum_grid_distance, bin_size=bin_size
)

assert (
GRID.shape[0] >= number_of_shells
), f"Distance cutoff should be increased so that there are at least {number_of_shells} GRID shells for {structure} (only {GRID.shape[0]} computed)."

grid_representations.append(GRID[:number_of_shells])

# Calculate EMD similarity
# Currently, rdf_similarity_matrix requires a list of dicts, each with a 'task_id' for each structure
# NOTE - this is likely to change in a future release
structure_ids = [
{"task_id": i, "structure": structures[i]} for i in range(len(grid_representations))
]

# Calculate EMD similarity between grids
grid_similarity = gridrdf.earth_mover_distance.rdf_similarity_matrix(
structure_ids, grid_representations, method="emd"
)

grid_similarity = grid_similarity.add(grid_similarity.T, fill_value=0)

# First, convert composition to vector encoding
elem_vectors, elem_symbols = gridrdf.composition.composition_one_hot(
structure_ids, method="percentage"
)

# Now computed EMD similarity based on "distances" between species contained in `similarity_matrix.csv`)
# This is essentially Pettifor distance, but with non-integer steps defined by data-mining of probabilities.
comp_similarity = gridrdf.earth_mover_distance.composition_similarity_matrix(
elem_vectors,
elem_similarity_file=os.path.join(data_source_loc, "similarity_matrix.csv"),
)

comp_similarity = comp_similarity.add(comp_similarity.T, fill_value=0)

total_similarity = 10 * grid_similarity + comp_similarity


# print("\nStructural Similarity:")
# print(grid_similarity)

# print("\nComposition Similarity:")
# print(comp_similarity)

# print("\nTotal similarity (= 10*GRID + Composition):")
# print(total_similarity)

grid_similarity.to_csv("matbench_kvrh_grid_similarity.csv", index=False, header=False)
comp_similarity.to_csv("matbench_kvrh_comp_similarity.csv", index=False, header=False)
total_similarity.to_csv("matbench_kvrh_total_similarity.csv", index=False, header=False)

1 + 1

# %% Code Graveyard

# from pymatgen.core.lattice import Lattice
# from pymatgen.core.structure import Structure

# # Set up dummy pymatgen Structures with different cells/compositions
# dummy_structures = [
# Structure(
# Lattice.from_parameters(a=3.84, b=3.84, c=3.84, alpha=120, beta=90, gamma=60),
# ["Si", "Si"],
# [[0, 0, 0], [0.75, 0.5, 0.75]],
# ),
# Structure(
# Lattice.from_parameters(a=3.84, b=3.84, c=3.84, alpha=120, beta=90, gamma=60),
# ["Ni", "Ni"],
# [[0, 0, 0], [0.75, 0.5, 0.75]],
# ),
# Structure(
# Lattice.from_parameters(a=3.94, b=3.94, c=3.94, alpha=120, beta=90, gamma=60),
# ["Si", "Si"],
# [[0, 0, 0], [0.75, 0.5, 0.75]],
# ),
# ]
1 change: 1 addition & 0 deletions examples/not_precomputed.py
Original file line number Diff line number Diff line change
@@ -198,6 +198,7 @@ def my_mvn(mu_x, mu_y, r):

val_k_neigh_avg = k_neigh_avg_targ[val_ids]


# %% 7. Weighted scores
def weighted_score(pred, proxy, pred_weight=1.0, proxy_weight=1.0):
"""Calculate weighted discovery score using the predicted target and proxy."""
Loading