Skip to content

Commit

Permalink
Feature/multi dates match (#64)
Browse files Browse the repository at this point in the history
* add support for constants

* massive refactoring

* update documentation
  • Loading branch information
b8raoult authored Oct 3, 2024
1 parent 2abc777 commit 3c8a1de
Show file tree
Hide file tree
Showing 36 changed files with 1,720 additions and 1,152 deletions.
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ repos:
hooks:
- id: ruff
# Next line if for documenation cod snippets
exclude: '^[^_].*_\.py$'
exclude: '.*/[^_].*_\.py$'
args:
- --line-length=120
- --fix
Expand Down
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ Keep it human-readable, your future self will thank you!
### Added

- Adding the user recipe in the dataset PR #59.
- Add `multi_dates_match` action in create.

### Changed

Expand Down
1 change: 1 addition & 0 deletions docs/building/sources.rst
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ The following `sources` are currently available:
sources/netcdf
sources/opendap
sources/recentre
sources/repeated_dates
sources/xarray-kerchunk
sources/xarray-zarr
sources/zenodo
25 changes: 25 additions & 0 deletions docs/building/sources/repeated_dates.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
################
repeated_dates
################

The generale format of the `repeated_dates` source is:

.. literalinclude:: yaml/repeated_dates1.yaml

**********
constant
**********

.. literalinclude:: yaml/repeated_dates2.yaml

*************
climatology
*************

.. literalinclude:: yaml/repeated_dates3.yaml

*********
closest
*********

.. literalinclude:: yaml/repeated_dates4.yaml
6 changes: 6 additions & 0 deletions docs/building/sources/yaml/repeated_dates1.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@

repeated_dates:
mode: mode
# ... parameters related to the mode ...
source:
# ... a source definition ...
6 changes: 6 additions & 0 deletions docs/building/sources/yaml/repeated_dates2.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
repeated_dates:
mode: constant
source:
xarray-zarr:
url: dem.zarr
variable: dem
8 changes: 8 additions & 0 deletions docs/building/sources/yaml/repeated_dates3.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
repeated_dates:
mode: climatology
year: 2019
day: 15
source:
grib:
path: some/path/to/data.grib
param: [some_param]
9 changes: 9 additions & 0 deletions docs/building/sources/yaml/repeated_dates4.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
repeated_dates:
mode: closest
frequency: 24h
maximum: 30d
skip_all_nans: true
source:
grib:
path: path/to/data.grib
param: [some_param]
6 changes: 3 additions & 3 deletions docs/index.rst
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
.. _index-page:

####################################
Welcome to Anemoi's documentation!
####################################
#############################################
Welcome to `anemoi-datasets` documentation!
#############################################

.. warning::

Expand Down
6 changes: 3 additions & 3 deletions src/anemoi/datasets/create/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,11 @@
from anemoi.utils.dates import frequency_to_timedelta
from anemoi.utils.humanize import compress_dates
from anemoi.utils.humanize import seconds_to_human
from earthkit.data.core.order import build_remapping

from anemoi.datasets import MissingDateError
from anemoi.datasets import open_dataset
from anemoi.datasets.create.input.trace import enable_trace
from anemoi.datasets.create.persistent import build_storage
from anemoi.datasets.data.misc import as_first_date
from anemoi.datasets.data.misc import as_last_date
Expand Down Expand Up @@ -309,7 +311,6 @@ def create_elements(self, config):


def build_input_(main_config, output_config):
from earthkit.data.core.order import build_remapping

builder = build_input(
main_config.input,
Expand Down Expand Up @@ -563,7 +564,7 @@ def _run(self):
# assert isinstance(group[0], datetime.datetime), type(group[0])
LOG.debug(f"Building data for group {igroup}/{self.n_groups}")

result = self.input.select(dates=group)
result = self.input.select(group_of_dates=group)
assert result.group_of_dates == group, (len(result.group_of_dates), len(group), group)

# There are several groups.
Expand Down Expand Up @@ -1031,7 +1032,6 @@ def run(self):

def creator_factory(name, trace=None, **kwargs):
if trace:
from anemoi.datasets.create.trace import enable_trace

enable_trace(trace)

Expand Down
6 changes: 6 additions & 0 deletions src/anemoi/datasets/create/check.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,9 +140,15 @@ class StatisticsValueError(ValueError):

def check_data_values(arr, *, name: str, log=[], allow_nans=False):

shape = arr.shape

if (isinstance(allow_nans, (set, list, tuple, dict)) and name in allow_nans) or allow_nans:
arr = arr[~np.isnan(arr)]

if arr.size == 0:
warnings.warn(f"Empty array for {name} ({shape})")
return

assert arr.size > 0, (name, *log)

min, max = arr.min(), arr.max()
Expand Down
8 changes: 7 additions & 1 deletion src/anemoi/datasets/create/functions/sources/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@


def _expand(paths):

if not isinstance(paths, list):
paths = [paths]

for path in paths:
if path.startswith("file://"):
path = path[7:]
Expand All @@ -40,8 +44,10 @@ def iterate_patterns(path, dates, **kwargs):
given_paths = path if isinstance(path, list) else [path]

dates = [d.isoformat() for d in dates]
if len(dates) > 0:
kwargs["date"] = dates

for path in given_paths:
paths = Pattern(path, ignore_missing_keys=True).substitute(date=dates, **kwargs)
paths = Pattern(path, ignore_missing_keys=True).substitute(**kwargs)
for path in _expand(paths):
yield path, dates
Original file line number Diff line number Diff line change
Expand Up @@ -376,6 +376,7 @@ def accumulations(context, dates, **request):
("ea", "oper"): dict(data_accumulation_period=1, base_times=(6, 18)),
("ea", "enda"): dict(data_accumulation_period=3, base_times=(6, 18)),
("rr", "oper"): dict(data_accumulation_period=3, base_times=(0, 3, 6, 9, 12, 15, 18, 21)),
("l5", "oper"): dict(data_accumulation_period=1, base_times=(0,)),
}

kwargs = KWARGS.get((class_, stream), {})
Expand Down
2 changes: 1 addition & 1 deletion src/anemoi/datasets/create/functions/sources/grib.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ def execute(context, dates, path, latitudes=None, longitudes=None, *args, **kwar
s = s.sel(valid_datetime=dates, **kwargs)
ds = ds + s

if kwargs:
if kwargs and not context.partial_ok:
check(ds, given_paths, valid_datetime=dates, **kwargs)

if geography is not None:
Expand Down
13 changes: 4 additions & 9 deletions src/anemoi/datasets/create/functions/sources/xarray/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,9 @@
import logging

from earthkit.data.core.fieldlist import MultiFieldList
from earthkit.data.indexing.fieldlist import FieldArray

from anemoi.datasets.data.stores import name_to_zarr_store
from anemoi.datasets.utils.fields import NewMetadataField
from anemoi.datasets.utils.fields import NewMetadataField as NewMetadataField

from .. import iterate_patterns
from .fieldlist import XarrayFieldList
Expand All @@ -31,7 +30,7 @@ def check(what, ds, paths, **kwargs):
raise ValueError(f"Expected {count} fields, got {len(ds)} (kwargs={kwargs}, {what}s={paths})")


def load_one(emoji, context, dates, dataset, options={}, match_all_dates=False, flavour=None, **kwargs):
def load_one(emoji, context, dates, dataset, options={}, flavour=None, **kwargs):
import xarray as xr

"""
Expand All @@ -52,12 +51,8 @@ def load_one(emoji, context, dates, dataset, options={}, match_all_dates=False,

fs = XarrayFieldList.from_xarray(data, flavour)

if match_all_dates:
match = fs.sel(**kwargs)
result = []
for date in dates:
result.append(FieldArray([NewMetadataField(f, valid_datetime=date) for f in match]))
result = MultiFieldList(result)
if len(dates) == 0:
return fs.sel(**kwargs)
else:
result = MultiFieldList([fs.sel(valid_datetime=date, **kwargs) for date in dates])

Expand Down
Loading

0 comments on commit 3c8a1de

Please sign in to comment.