From b9da4446a99c444ae6cec3b5169cc595ad872a6f Mon Sep 17 00:00:00 2001 From: tomvothecoder Date: Tue, 17 Dec 2024 11:12:19 -0800 Subject: [PATCH] Add time slicing before `.load()` for performance - Addresses performance bottleneck associated with attempting to load large datasets into memory. Time slicing reduces the size before loading into memory --- e3sm_diags/driver/utils/dataset_xr.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/e3sm_diags/driver/utils/dataset_xr.py b/e3sm_diags/driver/utils/dataset_xr.py index 94b109e66..bb1d5f68e 100644 --- a/e3sm_diags/driver/utils/dataset_xr.py +++ b/e3sm_diags/driver/utils/dataset_xr.py @@ -1132,15 +1132,16 @@ def _subset_time_series_dataset(self, ds: xr.Dataset, var: str) -> xr.Dataset: ------- xr.Dataset The subsetted time series dataset. - """ - ds_sub = self._subset_vars_and_load(ds, var) - time_slice = self._get_time_slice(ds_sub) - ds_sub = ds_sub.sel(time=time_slice).squeeze() + """ + time_slice = self._get_time_slice(ds) + ds_sub = ds.sel(time=time_slice).squeeze() if self.is_sub_monthly: ds_sub = self._exclude_sub_monthly_coord_spanning_year(ds_sub) + ds_sub = self._subset_vars_and_load(ds_sub, var) + return ds_sub def _get_time_slice(self, ds: xr.Dataset) -> slice: