Skip to content

Commit

Permalink
Add time slicing before .load() for performance
Browse files Browse the repository at this point in the history
- Addresses performance bottleneck associated with attempting to load large datasets into memory. Time slicing reduces the size before loading into memory
  • Loading branch information
tomvothecoder committed Dec 17, 2024
1 parent 09e853c commit b9da444
Showing 1 changed file with 5 additions and 4 deletions.
9 changes: 5 additions & 4 deletions e3sm_diags/driver/utils/dataset_xr.py
Original file line number Diff line number Diff line change
Expand Up @@ -1132,15 +1132,16 @@ def _subset_time_series_dataset(self, ds: xr.Dataset, var: str) -> xr.Dataset:
-------
xr.Dataset
The subsetted time series dataset.
"""
ds_sub = self._subset_vars_and_load(ds, var)
time_slice = self._get_time_slice(ds_sub)
ds_sub = ds_sub.sel(time=time_slice).squeeze()
"""
time_slice = self._get_time_slice(ds)
ds_sub = ds.sel(time=time_slice).squeeze()

if self.is_sub_monthly:
ds_sub = self._exclude_sub_monthly_coord_spanning_year(ds_sub)

ds_sub = self._subset_vars_and_load(ds_sub, var)

return ds_sub

def _get_time_slice(self, ds: xr.Dataset) -> slice:
Expand Down

0 comments on commit b9da444

Please sign in to comment.