Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BUG: fix ParamFileStore for on-demand frontend imports #4926

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions nose_ignores.txt
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
--ignore-file=test_normal_plot_api\.py
--ignore-file=test_on_demand_imports\.py
--ignore-file=test_outputs_pytest\.py
--ignore-file=test_parameter_file_storage\.py
--ignore-file=test_profile_plots\.py
--ignore-file=test_raw_field_slices\.py
--ignore-file=test_registration\.py
Expand Down
1 change: 1 addition & 0 deletions tests/tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,7 @@ other_tests:
- "--ignore-file=test_normal_plot_api\\.py"
- "--ignore-file=test_on_demand_imports\\.py"
- "--ignore-file=test_outputs_pytest\\.py"
- "--ignore-file=test_parameter_file_storage\\.py"
- "--ignore-file=test_profile_plots\\.py"
- "--ignore-file=test_raw_field_slices\\.py"
- "--ignore-file=test_registration\\.py"
Expand Down
26 changes: 24 additions & 2 deletions yt/utilities/parameter_file_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,16 @@
parallel_simple_proxy,
)

_field_names = ("hash", "bn", "fp", "tt", "ctid", "class_name", "last_seen")
_field_names = (
"hash",
"bn",
"fp",
"tt",
"ctid",
"class_name",
"module_name",
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is the potentially backwards incompatible part: when using the on-disk store, this new field will be a new column in the csv. so if an on-disk store already exists, there will be an error due to the different number of columns and the user would need to remove the old on-disk store.

"last_seen",
)


class NoParameterShelf(Exception):
Expand Down Expand Up @@ -82,6 +91,8 @@ def init_db(self):

def _get_db_name(self):
base_file_name = ytcfg.get("yt", "parameter_file_store")
if os.path.isfile(base_file_name):
return os.path.abspath(base_file_name)
if not os.access(os.path.expanduser("~/"), os.W_OK):
return os.path.abspath(base_file_name)
return os.path.expanduser(f"~/.yt/{base_file_name}")
Expand All @@ -104,6 +115,7 @@ def _adapt_ds(self, ds):
"tt": ds.current_time,
"ctid": ds.unique_identifier,
"class_name": ds.__class__.__name__,
"module_name": ds.__module__,
"last_seen": ds._instantiated,
}

Expand All @@ -114,7 +126,17 @@ def _convert_ds(self, ds_dict):
fn = os.path.join(fp, bn)
class_name = ds_dict["class_name"]
if class_name not in output_type_registry:
raise UnknownDatasetType(class_name)
# first check that the relevant frontend is imported:
# the output_type_registry is refreshed between sessions as
# frontends are imported on-demand now.
import importlib
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Any reason not to put this import at the top level ? importlib should be "free" to import anyway.


module = ds_dict["module_name"]
fe_api = ".".join(module.split(".")[0:3]) + ".api"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not 100% sure this is equivalent (I don't know how many "." are expected), but I'm willing to bet there's a clearer way to express this change in file extension.

Suggested change
fe_api = ".".join(module.split(".")[0:3]) + ".api"
fe_api = module.rpartition(".")[0] + ".api"

_ = importlib.import_module(fe_api)
# if it is **still** not in the registry, error
if class_name not in output_type_registry:
raise UnknownDatasetType(class_name)
mylog.info("Checking %s", fn)
if os.path.exists(fn):
ds = output_type_registry[class_name](os.path.join(fp, bn))
Expand Down
55 changes: 55 additions & 0 deletions yt/utilities/tests/test_parameter_file_storage.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import os

import pytest

import yt
from yt.config import ytcfg
from yt.utilities.parameter_file_storage import ParameterFileStore


@pytest.fixture
def store_parameter_files():
spf_init = ytcfg.get("yt", "store_parameter_files")
ytcfg.set("yt", "store_parameter_files", True)
yield
ytcfg.set("yt", "store_parameter_files", spf_init)


@pytest.fixture
def set_parameter_file(tmp_path):
pfs_init_name = ytcfg.get("yt", "parameter_file_store")
pfs_path = tmp_path / "param_file_store.csv"
pfs_path.touch()
ytcfg.set("yt", "parameter_file_store", str(pfs_path))
yield
ytcfg.set("yt", "parameter_file_store", pfs_init_name)


def test_on_disk_parameter_file_store(
monkeypatch, store_parameter_files, set_parameter_file
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixtures that are used as setup/teardown but don't need an accessible name should be declared with pytest.mark.usefixture

):
pfs = ParameterFileStore()
# patching _register to True forces a full re-initialization on next
# instantiation of a ParameterFileStore
monkeypatch.setattr(pfs, "_register", True)
monkeypatch.setattr(pfs, "_records", {})

# to force a re-initialization, set _register to True then get a new
# instance. This should read from disk.
pfs = ParameterFileStore()
pfs_path = ytcfg.get("yt", "parameter_file_store")
assert os.path.isfile(pfs_path) # on disk store should now exist
db_records = pfs.read_db() # force a read from disk
assert len(db_records) == 0 # and it should be empty

# check that ds load is registered on disk
ds = yt.load_sample("IsolatedGalaxy")
db_records = pfs.read_db()

assert len(db_records) == 1
hash = ds._hash()
assert hash in db_records
ds_rec = db_records[hash]
assert ds_rec["bn"] == "galaxy0030"
assert ds_rec["class_name"] == "EnzoDataset"
assert ds_rec["module_name"] == "yt.frontends.enzo.data_structures"
Loading