diff --git a/docs/index.rst b/docs/index.rst index 168aa7b4..7f4a0bc0 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -49,7 +49,7 @@ MONETIO please refer to: users_guide/supported_stats users_guide/time_chunking users_guide/gridded_datasets - + .. toctree:: :hidden: :maxdepth: 4 diff --git a/examples/process_swath_data/control_modis_l2.yaml b/examples/process_swath_data/control_modis_l2.yaml new file mode 100644 index 00000000..df631633 --- /dev/null +++ b/examples/process_swath_data/control_modis_l2.yaml @@ -0,0 +1,57 @@ +analysis: + # 2020 September 9 - September 11 253 - 255 + start_time: '2020-09-09' + end_time: '2020-09-12' + time_interval: '24h' + output_dir: $HOME/Plots + debug: True + regrid: True + target_grid: obs_grid + +obs_grid: + start_time: '2020-09-09' + end_time: '2020-09-12' + ntime: 72 + nlat: 180 + nlon: 360 + +obs: + Terra_MODIS: + # MOD04_L2.AYYYYDDD.HHMM.0XX.timestamp.hdf + debug: False + obs_type: 'sat_swath_clm' + sat_type: 'modis_l2' + filename: $HOME/Data/MODIS/Terra/C61/2020/*/MOD04_L2.*.hdf + variables: + AOD_550_Dark_Target_Deep_Blue_Combined: + minimum: 0.0 + maximum: 10.0 + scale: 0.001 + units: none + + Aqua_MODIS: + # MYD04_L2.AYYYYDDD.HHMM.0XX.timestamp.hdf + debug: False + obs_type: 'sat_swath_clm' + sat_type: 'modis_l2' + filename: $HOME/Data/MODIS/Aqua/C61/2020/*/MYD04_L2.*.hdf + variables: + AOD_550_Dark_Target_Deep_Blue_Combined: + minimum: 0.0 + maximum: 10.0 + scale: 0.001 + units: none + +model: + MERRA2: + mod_type: reanalysis + files: $HOME/Data/MERRA2/tavg1_2d_aer_Nx/*nc4 + regrid: + base_grid: $HOME/Data/Grids/merra2_grid.nc + method: bilinear + mapping: + Terra_MODIS: + TOTEXTTAU: AOD_550_Dark_Target_Deep_Blue_Combined + Aqua_MODIS: + TOTEXTTAU: AOD_550_Dark_Target_Deep_Blue_Combined + diff --git a/examples/process_swath_data/process_modis_l2.py b/examples/process_swath_data/process_modis_l2.py new file mode 100644 index 00000000..38d33d1c --- /dev/null +++ b/examples/process_swath_data/process_modis_l2.py @@ -0,0 +1,31 @@ +from melodies_monet import driver + +an = driver.analysis() +an.control = 'control_modis_l2.yaml' +an.read_control() + +an.open_models() + +an.setup_obs_grid() +# print(an.obs_grid) + +an.setup_regridders() + +for time_interval in an.time_intervals: + + print(time_interval) + + an.open_obs(time_interval=time_interval) + an.update_obs_gridded_data() + +an.normalize_obs_gridded_data() +print(an.obs_gridded_dataset) + +an.obs_gridded_dataset.to_netcdf('MODIS.nc') + +for model in an.models: + print(an.models[model].obj) + regridder = an.model_regridders[model] + print(regridder) + ds_model_regrid = regridder(an.models[model].obj) + diff --git a/melodies_monet/driver.py b/melodies_monet/driver.py index edfc4299..ca7d7920 100644 --- a/melodies_monet/driver.py +++ b/melodies_monet/driver.py @@ -242,7 +242,7 @@ def rename_vars(self): self.obj = self.obj.rename({v:d['rename']}) self.variable_dict[d['rename']] = self.variable_dict.pop(v) - def open_sat_obs(self,time_interval=None): + def open_sat_obs(self, time_interval=None, control_dict=None): """Methods to opens satellite data observations. Uses in-house python code to open and load observations. Alternatively may use the satpy reader. @@ -285,10 +285,15 @@ def open_sat_obs(self,time_interval=None): self.obj = mio.sat._mopitt_l3_mm.open_dataset(self.file, ['column','pressure_surf','apriori_col', 'apriori_surf','apriori_prof','ak_col']) elif self.sat_type == 'modis_l2': - from monetio import modis_l2 + # from monetio import modis_l2 print('Reading MODIS L2') - self.obj = modis_l2.read_mfdataset( - self.file, self.variable_dict, debug=self.debug) + flst = tsub.subset_MODIS_l2(self.file,time_interval) + # self.obj = mio.sat._modis_l2_mm.read_mfdataset( + # self.file, self.variable_dict, debug=self.debug) + self.obj = mio.sat._modis_l2_mm.read_mfdataset( + flst, self.variable_dict, debug=self.debug) + # self.obj = granules, an OrderedDict of Datasets, keyed by datetime_str, + # with variables: Latitude, Longitude, Scan_Start_Time, parameters, ... elif self.sat_type == 'tropomi_l2_no2': #from monetio import tropomi_l2_no2 print('Reading TROPOMI L2 NO2') @@ -300,7 +305,7 @@ def open_sat_obs(self,time_interval=None): except ValueError as e: print('something happened opening file:', e) return - + def filter_obs(self): """Filter observations based on filter_dict. @@ -712,6 +717,11 @@ def __init__(self): self.target_grid = None self.obs_regridders = None self.model_regridders = None + self.obs_grid = None + self.obs_edges = None + self.obs_gridded_data = {} + self.obs_gridded_count = {} + self.obs_gridded_dataset = None def __repr__(self): return ( @@ -880,8 +890,11 @@ def setup_regridders(self): """ from .util import regrid_util if self.regrid: - self.obs_regridders = regrid_util.setup_regridder(self.control_dict, config_group='obs') - self.model_regridders = regrid_util.setup_regridder(self.control_dict, config_group='model') + if self.target_grid == 'obs_grid': + self.model_regridders = regrid_util.setup_regridder(self.control_dict, config_group='model', target_grid=self.da_obs_grid) + else: + self.obs_regridders = regrid_util.setup_regridder(self.control_dict, config_group='obs') + self.model_regridders = regrid_util.setup_regridder(self.control_dict, config_group='model') def open_models(self, time_interval=None,load_files=True): """Open all models listed in the input yaml file and create a :class:`model` @@ -1022,11 +1035,91 @@ def open_obs(self, time_interval=None, load_files=True): if load_files: if o.obs_type in ['sat_swath_sfc', 'sat_swath_clm', 'sat_grid_sfc',\ 'sat_grid_clm', 'sat_swath_prof']: - o.open_sat_obs(time_interval=time_interval) + o.open_sat_obs(time_interval=time_interval, control_dict=self.control_dict) else: o.open_obs(time_interval=time_interval, control_dict=self.control_dict) self.obs[o.label] = o + def setup_obs_grid(self): + """ + Setup a uniform observation grid. + """ + from .util import grid_util + ntime = self.control_dict['obs_grid']['ntime'] + nlat = self.control_dict['obs_grid']['nlat'] + nlon = self.control_dict['obs_grid']['nlon'] + self.obs_grid, self.obs_edges = grid_util.generate_uniform_grid( + self.control_dict['obs_grid']['start_time'], + self.control_dict['obs_grid']['end_time'], + ntime, nlat, nlon) + + self.da_obs_grid = xr.DataArray(dims=['lon', 'lat'], + coords={'lon': self.obs_grid['longitude'], + 'lat': self.obs_grid['latitude']}) + # print(self.da_obs_grid) + + for obs in self.control_dict['obs']: + for var in self.control_dict['obs'][obs]['variables']: + print('initializing gridded data and counts ', obs, var) + self.obs_gridded_data[obs + '_' + var] = np.zeros([ntime, nlon, nlat], dtype=np.float32) + self.obs_gridded_count[obs + '_' + var] = np.zeros([ntime, nlon, nlat], dtype=np.int32) + + def update_obs_gridded_data(self): + from .util import grid_util + """ + Update observation grid cell values and counts, + for all observation datasets and parameters. + """ + for obs in self.obs: + for obs_time in self.obs[obs].obj: + print('updating obs time: ', obs, obs_time) + obs_timestamp = pd.to_datetime( + obs_time, format='%Y%j%H%M').timestamp() + # print(obs_timestamp) + for var in self.obs[obs].obj[obs_time]: + key = obs + '_' + var + print(key) + n_obs = self.obs[obs].obj[obs_time][var].size + grid_util.update_data_grid( + self.obs_edges['time_edges'], + self.obs_edges['lon_edges'], + self.obs_edges['lat_edges'], + np.full(n_obs, obs_timestamp, dtype=np.float32), + self.obs[obs].obj[obs_time].coords['lon'].values.flatten(), + self.obs[obs].obj[obs_time].coords['lat'].values.flatten(), + self.obs[obs].obj[obs_time][var].values.flatten(), + self.obs_gridded_count[key], + self.obs_gridded_data[key]) + + def normalize_obs_gridded_data(self): + from .util import grid_util + """ + Normalize observation grid cell values where counts is not zero. + Create data arrays for the obs_gridded_dataset dictionary. + """ + self.obs_gridded_dataset = xr.Dataset() + + for obs in self.obs: + for var in self.control_dict['obs'][obs]['variables']: + key = obs + '_' + var + print(key) + grid_util.normalize_data_grid( + self.obs_gridded_count[key], + self.obs_gridded_data[key]) + da_data = xr.DataArray( + self.obs_gridded_data[key], + dims=['time', 'lon', 'lat'], + coords={'time': self.obs_grid['time'], + 'lon': self.obs_grid['longitude'], + 'lat': self.obs_grid['latitude']}) + da_count = xr.DataArray( + self.obs_gridded_count[key], + dims=['time', 'lon', 'lat'], + coords={'time': self.obs_grid['time'], + 'lon': self.obs_grid['longitude'], + 'lat': self.obs_grid['latitude']}) + self.obs_gridded_dataset[key + '_data'] = da_data + self.obs_gridded_dataset[key + '_count'] = da_count def pair_data(self, time_interval=None): """Pair all observations and models in the analysis class diff --git a/melodies_monet/util/grid_util.py b/melodies_monet/util/grid_util.py index af1ffd1e..18b05042 100644 --- a/melodies_monet/util/grid_util.py +++ b/melodies_monet/util/grid_util.py @@ -8,6 +8,7 @@ import math import numpy as np +import numba def update_sparse_data_grid(time_edges, x_edges, y_edges, @@ -53,7 +54,7 @@ def update_sparse_data_grid(time_edges, x_edges, y_edges, def normalize_sparse_data_grid(count_grid, data_grid): """ - Normalize accumuxed data on a uniform grid + Normalize accumulated data on a uniform grid Parameters count_grid (dict): number of obs points in grid cell @@ -95,6 +96,7 @@ def sparse_data_to_array(time_edges, x_edges, y_edges, return count_grid_array, data_grid_array +@numba.jit(nopython=True) def update_data_grid(time_edges, x_edges, y_edges, time_obs, x_obs, y_obs, data_obs, count_grid, data_grid): @@ -125,16 +127,30 @@ def update_data_grid(time_edges, x_edges, y_edges, i_time = math.floor((time_obs[i] - time_edges[0]) / time_del) i_x = math.floor((x_obs[i] - x_edges[0]) / x_del) i_y = math.floor((y_obs[i] - y_edges[0]) / y_del) + """ i_time = np.clip(i_time, 0, ntime - 1) i_x = np.clip(i_x, 0, nx - 1) i_y = np.clip(i_y, 0, ny - 1) + """ + if i_time < 0: + i_time = 0 + elif i_time >= ntime: + i_time = ntime - 1 + if i_x < 0: + i_x = 0 + elif i_x >= nx: + i_x = nx - 1 + if i_y < 0: + i_y = 0 + elif i_y >= ny: + i_y = ny - 1 count_grid[i_time, i_x, i_y] += 1 data_grid[i_time, i_x, i_y] += data_obs[i] def normalize_data_grid(count_grid, data_grid): """ - Normalize accumuxed data on a uniform grid + Normalize accumulated data on a uniform grid Parameters count_grid (np.array): number of obs points in grid cell @@ -143,17 +159,15 @@ def normalize_data_grid(count_grid, data_grid): Returns None """ + mask = (count_grid > 0) data_grid[count_grid == 0] = np.nan - data_grid[count_grid > 0] /= count_grid[count_grid > 0] + data_grid[mask] /= count_grid[mask] -def generate_uniform_grid(paired_dims,start,end,obstime,ntime,nlat,nlon): + +def generate_uniform_grid(start, end, ntime, nlat, nlon): import pandas as pd - #import xarray as xr - start_timestamp = pd.to_datetime(start).timestamp() end_timestamp = pd.to_datetime(end).timestamp() - time_stamps = [float(t.timestamp()) for t in obstime] - time_stamps = np.array(time_stamps)[:,None]*np.ones((paired_dims['time'],paired_dims['y'])) ntime = ntime nlat = nlat @@ -168,9 +182,10 @@ def generate_uniform_grid(paired_dims,start,end,obstime,ntime,nlat,nlon): lat_min, lat_max = lat_edges[0:nlat], lat_edges[1:nlat+1] lon_edges = np.linspace(lon0, lon0 + 360, nlon+1, endpoint=True, dtype=float) lon_grid = 0.5 * (lon_edges[0:nlon] + lon_edges[1:nlon+1]) - + grid = {'longitude':lon_grid, - 'latitude':lat_grid, - 'time':time_grid} + 'latitude':lat_grid, + 'time':time_grid} edges = {'time_edges':time_edges,'lon_edges':lon_edges,'lat_edges':lat_edges} - return grid,edges,time_stamps \ No newline at end of file + + return grid, edges diff --git a/melodies_monet/util/regrid_util.py b/melodies_monet/util/regrid_util.py index 1ccd2517..89badc32 100644 --- a/melodies_monet/util/regrid_util.py +++ b/melodies_monet/util/regrid_util.py @@ -9,7 +9,7 @@ import xarray as xr -def setup_regridder(config, config_group='obs'): +def setup_regridder(config, config_group='obs', target_grid=None): """ Setup regridder for observations or model @@ -25,8 +25,14 @@ def setup_regridder(config, config_group='obs'): print('regrid_util: xesmf module not found') raise - target_file = os.path.expandvars(config['analysis']['target_grid']) - ds_target = xr.open_dataset(target_file) + print('setup_regridder.target_grid') + print(target_grid) + + if target_grid is not None: + ds_target = target_grid + else: + target_file = os.path.expandvars(config['analysis']['target_grid']) + ds_target = xr.open_dataset(target_file) regridder_dict = dict() diff --git a/melodies_monet/util/time_interval_subset.py b/melodies_monet/util/time_interval_subset.py index 118465ed..3e671f39 100644 --- a/melodies_monet/util/time_interval_subset.py +++ b/melodies_monet/util/time_interval_subset.py @@ -34,4 +34,23 @@ def subset_OMPS_l2(file_path,timeinterval): for j in fst: interval_files.append(j) return interval_files - + +def subset_MODIS_l2(file_path,timeinterval): + '''Dependent on filenaming convention + MOD04_L2.AYYYYDDD.HHMM.collection.timestamp.hdf + MYD04_L2.AYYYYDDD.HHMM.collection.timestamp.hdf + ''' + import pandas as pd + from glob import glob + import fnmatch + all_files = glob(file_path) + interval_files = [] + subset_interval = pd.date_range(start=timeinterval[0],end=timeinterval[-1],freq='h',inclusive='left') + + for i in subset_interval: + fst = fnmatch.filter(all_files,'*M?D04_L2.A{}*.hdf'.format(i.strftime('%Y%j.%H'))) + fst.sort() + for j in fst: + interval_files.append(j) + return interval_files +