Skip to content

Commit

Permalink
Merge branch 'master' of github.com:ppdebreuck/modnet
Browse files Browse the repository at this point in the history
  • Loading branch information
gbrunin committed Apr 8, 2024
2 parents 6afc37f + f0e2e99 commit fdb1301
Show file tree
Hide file tree
Showing 9 changed files with 64 additions and 26 deletions.
2 changes: 1 addition & 1 deletion modnet/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.4.2"
__version__ = "0.4.3"
21 changes: 17 additions & 4 deletions modnet/featurizers/featurizers.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,10 @@ def _fit_apply_featurizers(
_featurizers.set_n_jobs(self._n_jobs)

return _featurizers.featurize_dataframe(
df, column, multiindex=True, ignore_errors=True
df,
column,
multiindex=True,
ignore_errors=getattr(self, "ignore_errors", True),
)
elif mode == "single":

Expand All @@ -164,7 +167,10 @@ def _fit_apply_featurizers(
)
start = time.monotonic_ns()
df = featurizer.featurize_dataframe(
df, column, multiindex=True, ignore_errors=True
df,
column,
multiindex=True,
ignore_errors=getattr(self, "ignore_errors", True),
)
LOG.info(
f"Applied featurizer {featurizer.__class__.__name__} to column {column!r} in {(time.monotonic_ns() - start) * 1e-9} seconds"
Expand Down Expand Up @@ -244,7 +250,11 @@ def featurize_composition(self, df: pd.DataFrame) -> pd.DataFrame:
else:
df = CompositionToOxidComposition(
max_sites=-1 if getattr(self, "continuous_only", False) else None
).featurize_dataframe(df, col_id=col_comp, ignore_errors=True)
).featurize_dataframe(
df,
col_id=col_comp,
ignore_errors=getattr(self, "ignore_errors", True),
)
df = self._fit_apply_featurizers(
df,
self.oxid_composition_featurizers,
Expand Down Expand Up @@ -311,7 +321,10 @@ def featurize_site(
fingerprint, stats=self.site_stats
)
df = site_stats_fingerprint.featurize_dataframe(
df, "Input data|structure", multiindex=False, ignore_errors=True
df,
"Input data|structure",
multiindex=False,
ignore_errors=getattr(self, "ignore_errors", True),
)

if aliases:
Expand Down
2 changes: 1 addition & 1 deletion modnet/models/bayesian.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
try:
import tensorflow_probability as tfp
except ImportError:
raise RuntimeError(
raise ImportError(
"`tensorflow-probability` is required for Bayesian models: install modnet[bayesian]."
)

Expand Down
4 changes: 3 additions & 1 deletion modnet/models/vanilla.py
Original file line number Diff line number Diff line change
Expand Up @@ -825,7 +825,9 @@ def evaluate(
f"Loss {loss} not recognized. Use mae, mse or a callable."
)
else:
score.append(loss(y_true, y_pred[i]))
pass

score.append(loss(y_true, y_pred[i]))

return np.mean(score)

Expand Down
21 changes: 20 additions & 1 deletion modnet/tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
import pytest
from pathlib import Path
from modnet.preprocessing import CompositionContainer

from modnet.utils import get_hash_of_file
from pymatgen.core import Structure


_TEST_DATA_HASHES = {
Expand Down Expand Up @@ -41,7 +43,24 @@ def _load_moddata(filename):
# what it was when created
assert get_hash_of_file(data_file) == _TEST_DATA_HASHES[filename]

return MODData.load(data_file)
moddata = MODData.load(data_file)
# For forwards compatibility with pymatgen, we have to patch our old test data to have the following attributes
# to allow for depickling
# This is hopefully only a temporary solution, and in future, we should serialize pymatgen objects
# with Monty's `from_dict`/`to_dict` to avoid having to hack this private interface
for ind, s in enumerate(moddata.structures):
if isinstance(s, Structure):
# assume all previous data was periodic
moddata.structures[ind].lattice._pbc = [True, True, True]
for jnd, site in enumerate(s.sites):
# assume all of our previous data had ordered sites
moddata.structures[ind].sites[jnd].label = str(next(iter(site.species)))
# required for the global structure.is_ordered to work
moddata.structures[ind].sites[jnd].species._n_atoms = 1.0
elif isinstance(s, CompositionContainer):
moddata.structures[ind].composition._n_atoms = s.composition._natoms

return moddata


@pytest.fixture(scope="function")
Expand Down
13 changes: 13 additions & 0 deletions modnet/tests/test_model.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#!/usr/bin/env python
import pytest
import numpy as np


def test_train_small_model_single_target(subset_moddata, tf_session):
Expand All @@ -21,6 +22,7 @@ def test_train_small_model_single_target(subset_moddata, tf_session):

model.fit(data, epochs=2)
model.predict(data)
assert not np.isnan(model.evaluate(data))


def test_train_small_model_single_target_classif(subset_moddata, tf_session):
Expand Down Expand Up @@ -49,6 +51,7 @@ def is_metal(egap):
)

model.fit(data, epochs=2)
assert not np.isnan(model.evaluate(data))


def test_train_small_model_multi_target(subset_moddata, tf_session):
Expand All @@ -70,6 +73,7 @@ def test_train_small_model_multi_target(subset_moddata, tf_session):

model.fit(data, epochs=2)
model.predict(data)
assert not np.isnan(model.evaluate(data))


def test_train_small_model_presets(subset_moddata, tf_session):
Expand Down Expand Up @@ -109,6 +113,7 @@ def test_train_small_model_presets(subset_moddata, tf_session):
models = results[0]
assert len(models) == len(modified_presets)
assert len(models[0]) == num_nested
assert not np.isnan(model.evaluate(data))


def test_model_integration(subset_moddata, tf_session):
Expand All @@ -134,6 +139,7 @@ def test_model_integration(subset_moddata, tf_session):
loaded_model = MODNetModel.load("test")

assert model.predict(data).equals(loaded_model.predict(data))
assert not np.isnan(model.evaluate(data))


def test_train_small_bayesian_single_target(subset_moddata, tf_session):
Expand All @@ -156,6 +162,7 @@ def test_train_small_bayesian_single_target(subset_moddata, tf_session):
model.fit(data, epochs=2)
model.predict(data)
model.predict(data, return_unc=True)
assert not np.isnan(model.evaluate(data))


def test_train_small_bayesian_single_target_classif(subset_moddata, tf_session):
Expand Down Expand Up @@ -186,6 +193,7 @@ def is_metal(egap):
model.fit(data, epochs=2)
model.predict(data)
model.predict(data, return_unc=True)
assert not np.isnan(model.evaluate(data))


def test_train_small_bayesian_multi_target(subset_moddata, tf_session):
Expand All @@ -208,6 +216,7 @@ def test_train_small_bayesian_multi_target(subset_moddata, tf_session):
model.fit(data, epochs=2)
model.predict(data)
model.predict(data, return_unc=True)
assert not np.isnan(model.evaluate(data))


def test_train_small_bootstrap_single_target(subset_moddata, tf_session):
Expand All @@ -232,6 +241,7 @@ def test_train_small_bootstrap_single_target(subset_moddata, tf_session):
model.fit(data, epochs=2)
model.predict(data)
model.predict(data, return_unc=True)
assert not np.isnan(model.evaluate(data))


def test_train_small_bootstrap_single_target_classif(small_moddata, tf_session):
Expand Down Expand Up @@ -264,6 +274,7 @@ def is_metal(egap):
model.fit(data, epochs=2)
model.predict(data)
model.predict(data, return_unc=True)
assert not np.isnan(model.evaluate(data))


def test_train_small_bootstrap_multi_target(small_moddata, tf_session):
Expand Down Expand Up @@ -333,3 +344,5 @@ def test_train_small_bootstrap_presets(small_moddata, tf_session):
models = results[0]
assert len(models) == len(modified_presets)
assert len(models[0]) == num_nested

assert not np.isnan(model.evaluate(data))
23 changes: 7 additions & 16 deletions modnet/tests/test_preprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,14 @@ def check_column_values(new: MODData, reference: MODData, tolerance=0.03):
Allows for some columns to be checked more loosely (see inline comment below).
"""
new_cols = set(new.df_featurized.columns)
old_cols = set(reference.df_featurized.columns)

# Check that the new df only adds new columns and is not missing anything
assert not (old_cols - new_cols)

error_cols = set()
for col in new.df_featurized.columns:
for col in old_cols:
if not (
np.absolute(
(
Expand Down Expand Up @@ -349,14 +355,6 @@ def test_small_moddata_featurization(small_moddata_2023, featurizer_mode):
featurizer.featurizer_mode = featurizer_mode
new = MODData(structures, targets, target_names=names, featurizer=featurizer)
new.featurize(fast=False, n_jobs=1)

new_cols = sorted(new.df_featurized.columns.tolist())
old_cols = sorted(old.df_featurized.columns.tolist())

for i in range(len(old_cols)):
assert new_cols[i] == old_cols[i]

np.testing.assert_array_equal(old_cols, new_cols)
check_column_values(new, old, tolerance=0.03)


Expand All @@ -376,13 +374,6 @@ def test_small_moddata_composition_featurization(
new = MODData(materials=compositions, featurizer=featurizer)
new.featurize(fast=False, n_jobs=1)

new_cols = sorted(new.df_featurized.columns.tolist())
ref_cols = sorted(reference.df_featurized.columns.tolist())

for i in range(len(ref_cols)):
# print(new_cols[i], ref_cols[i])
assert new_cols[i] == ref_cols[i]

# assert relative error below 3 percent
check_column_values(new, reference, tolerance=0.03)

Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@ pandas==1.5.2
scikit-learn==1.3.2
matminer==0.9.2
numpy>=1.25
pymatgen==2023.11.12
pymatgen==2024.3.1
scikit-learn==1.3.2
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
packages=setuptools.find_packages(),
install_requires=[
"pandas~=1.5",
"tensorflow~=2.10",
"tensorflow~=2.10,<2.12",
"pymatgen>=2023",
"matminer~=0.9",
"numpy>=1.24",
Expand Down

0 comments on commit fdb1301

Please sign in to comment.