diff --git a/CHANGELOG.md b/CHANGELOG.md index c2dea89f..b2f1395f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,7 @@ Keep it human-readable, your future self will thank you! - Added incremental building of datasets - Add missing dependency for documentation building - Fix failing test due to previous merge +- Bug fix when creating dataset from zarr ### Removed diff --git a/src/anemoi/datasets/create/__init__.py b/src/anemoi/datasets/create/__init__.py index a52e6616..461aad73 100644 --- a/src/anemoi/datasets/create/__init__.py +++ b/src/anemoi/datasets/create/__init__.py @@ -486,17 +486,6 @@ def _run(self): assert chunks == self.dataset.get_zarr_chunks(), (chunks, self.dataset.get_zarr_chunks()) - def sanity_check_config(a, b): - a = json.dumps(a, sort_keys=True, default=str) - b = json.dumps(b, sort_keys=True, default=str) - b = b.replace("T", " ") # dates are expected to be different because - if a != b: - print("❌❌❌ FIXME: Config serialisation to be checked") - print(a) - print(b) - - sanity_check_config(self.main_config, self.dataset.get_main_config()) - # Return the number of groups to process, so we can show a nice progress bar return len(lengths) diff --git a/src/anemoi/datasets/create/functions/sources/xarray/field.py b/src/anemoi/datasets/create/functions/sources/xarray/field.py index cdbd061f..f737e04c 100644 --- a/src/anemoi/datasets/create/functions/sources/xarray/field.py +++ b/src/anemoi/datasets/create/functions/sources/xarray/field.py @@ -7,6 +7,7 @@ # nor does it submit to any jurisdiction. # +import datetime import logging from earthkit.data.core.fieldlist import Field @@ -103,7 +104,12 @@ def longitudes(self): @property def forecast_reference_time(self): - return self.owner.forecast_reference_time + date, time = self.metadata("date", "time") + assert len(time) == 4, time + assert len(date) == 8, date + yyyymmdd = int(date) + time = int(time) // 100 + return datetime.datetime(yyyymmdd // 10000, yyyymmdd // 100 % 100, yyyymmdd % 100, time) def __repr__(self): return repr(self._metadata) diff --git a/src/anemoi/datasets/create/functions/sources/xarray/metadata.py b/src/anemoi/datasets/create/functions/sources/xarray/metadata.py index e98f9ea7..85ca95d9 100644 --- a/src/anemoi/datasets/create/functions/sources/xarray/metadata.py +++ b/src/anemoi/datasets/create/functions/sources/xarray/metadata.py @@ -70,15 +70,17 @@ def as_namespace(self, namespace=None): return self._as_mars() def _as_mars(self): - return dict( - param=self["variable"], - step=self["step"], - levelist=self["level"], - levtype=self["levtype"], - number=self["number"], - date=self["date"], - time=self["time"], - ) + return {} + # p = dict( + # param=self.get("variable", self.get("param")), + # step=self.get("step"), + # levelist=self.get("levelist", self.get("level")), + # levtype=self.get("levtype"), + # number=self.get("number"), + # date=self.get("date"), + # time=self.get("time"), + # ) + # return {k: v for k, v in p.items() if v is not None} def _base_datetime(self): return self._field.forecast_reference_time @@ -135,12 +137,12 @@ def resolution(self): # TODO: implement resolution return None - @property + # @property def mars_grid(self): # TODO: implement mars_grid return None - @property + # @property def mars_area(self): # TODO: code me # return [self.north, self.west, self.south, self.east] diff --git a/src/anemoi/datasets/dates/__init__.py b/src/anemoi/datasets/dates/__init__.py index c487630f..fe1054ee 100644 --- a/src/anemoi/datasets/dates/__init__.py +++ b/src/anemoi/datasets/dates/__init__.py @@ -11,9 +11,9 @@ # from anemoi.utils.dates import as_datetime from anemoi.utils.dates import DateTimes -from anemoi.utils.dates import HindcastDatesTimes from anemoi.utils.dates import as_datetime from anemoi.utils.dates import frequency_to_timedelta +from anemoi.utils.hindcasts import HindcastDatesTimes from anemoi.utils.humanize import print_dates diff --git a/tests/xarray/test_netcdf.py b/tests/xarray/test_netcdf.py index bd1245c6..18c49365 100644 --- a/tests/xarray/test_netcdf.py +++ b/tests/xarray/test_netcdf.py @@ -50,6 +50,8 @@ def skip_test_netcdf(): assert len(fs) == checks["length"], (url, len(fs)) + print(fs[0].datetime()) + if __name__ == "__main__": skip_test_netcdf() diff --git a/tests/xarray/test_opendap.py b/tests/xarray/test_opendap.py index 6ae3981f..6c544b70 100644 --- a/tests/xarray/test_opendap.py +++ b/tests/xarray/test_opendap.py @@ -19,6 +19,8 @@ def test_opendap(): assert len(fs) == 79529 + print(fs[0].datetime()) + if __name__ == "__main__": for name, obj in list(globals().items()): diff --git a/tests/xarray/test_zarr.py b/tests/xarray/test_zarr.py index 509e3afe..7f3acec6 100644 --- a/tests/xarray/test_zarr.py +++ b/tests/xarray/test_zarr.py @@ -10,7 +10,7 @@ from anemoi.datasets.create.functions.sources.xarray import XarrayFieldList -def test_arco_era5(): +def test_arco_era5_1(): ds = xr.open_zarr( "gs://gcp-public-data-arco-era5/ar/1959-2022-full_37-1h-0p25deg-chunk-1.zarr-v2", @@ -21,6 +21,25 @@ def test_arco_era5(): fs = XarrayFieldList.from_xarray(ds) print(len(fs)) + print(fs[0].datetime()) + + print(fs[-1].metadata()) + # print(fs[-1].to_numpy()) + + assert len(fs) == 128677526 + + +def test_arco_era5_2(): + + ds = xr.open_zarr( + "gs://gcp-public-data-arco-era5/ar/1959-2022-1h-360x181_equiangular_with_poles_conservative.zarr", + chunks={"time": 48}, + consolidated=True, + ) + + fs = XarrayFieldList.from_xarray(ds) + print(len(fs)) + print(fs[-1].metadata()) # print(fs[-1].to_numpy()) @@ -50,6 +69,8 @@ def test_weatherbench(): assert fs[0].metadata("valid_datetime") == "2020-01-01T06:00:00", fs[0].metadata("valid_datetime") assert fs[-1].metadata("valid_datetime") == "2021-01-10T12:00:00", fs[-1].metadata("valid_datetime") + print(fs[0].datetime()) + def test_inca_one_date(): url = "https://object-store.os-api.cci1.ecmwf.int/ml-tests/test-data/example-inca-one-date.zarr" @@ -65,8 +86,12 @@ def test_inca_one_date(): assert f.metadata("number") == 0 assert f.metadata("variable") == vars[i] + print(fs[0].datetime()) + if __name__ == "__main__": + test_arco_era5_2() + exit() for name, obj in list(globals().items()): if name.startswith("test_") and callable(obj): print(f"Running {name}...")