From a6faceeda4c8956ba9bf122673424af977d28c26 Mon Sep 17 00:00:00 2001 From: Diego Cammarano Date: Mon, 16 Dec 2024 17:21:27 +0100 Subject: [PATCH] fix formatter, edit config file --- .../data/cmor_config/ESACCI-CLOUD.yml | 126 ++++---- .../data/formatters/datasets/esacci_cloud.py | 272 +++++++++++++++--- 2 files changed, 302 insertions(+), 96 deletions(-) diff --git a/esmvaltool/cmorizers/data/cmor_config/ESACCI-CLOUD.yml b/esmvaltool/cmorizers/data/cmor_config/ESACCI-CLOUD.yml index 604e912f08..594ca9df23 100644 --- a/esmvaltool/cmorizers/data/cmor_config/ESACCI-CLOUD.yml +++ b/esmvaltool/cmorizers/data/cmor_config/ESACCI-CLOUD.yml @@ -1,4 +1,4 @@ -# CMORIZE ESA CCI CLOUD daily data +# CMORIZE ESA CCI CLOUD daily/monthly data --- # Common global attributes for Cmorizer output @@ -11,65 +11,85 @@ attributes: modeling_realm: sat reference: 'esacci_cloud' comment: '' - start_year: 2003 - end_year: 2007 + # start_year: 2003 + # end_year: 2007 + start_year: 1982 + end_year: 1982 # Variables to cmorize variables: - clt: - mip: day - raw: [cmask_desc, cmask_asc] - raw_units: '1' - file: '-ESACCI-L3U_CLOUD-CLD_MASKTYPE-AVHRR_*-fv3.0.nc' - clwvi: - mip: CFday - raw: [cwp_desc, cwp_asc] - raw_units: g/m2 - file: '-ESACCI-L3U_CLOUD-CLD_PRODUCTS-AVHRR_*-fv3.0.nc' - ctp: - mip: day - raw: [ctp_desc, ctp_asc] - raw_units: hPa - file: '-ESACCI-L3U_CLOUD-CLD_PRODUCTS-AVHRR_*-fv3.0.nc' - reff: - mip: day - raw: [cer_desc, cer_asc] - raw_units: um - file: '-ESACCI-L3U_CLOUD-CLD_PRODUCTS-AVHRR_*-fv3.0.nc' - cod: - mip: AERday - raw: [cot_desc, cot_asc] - raw_units: 1 - file: '-ESACCI-L3U_CLOUD-CLD_PRODUCTS-AVHRR_*-fv3.0.nc' # clt: - # mip: + # mip: day # raw: [cmask_desc, cmask_asc] # raw_units: '1' # file: '-ESACCI-L3U_CLOUD-CLD_MASKTYPE-AVHRR_*-fv3.0.nc' + # clwvi: + # mip: CFday + # raw: [cwp_desc, cwp_asc] + # raw_units: g/m2 + # file: '-ESACCI-L3U_CLOUD-CLD_PRODUCTS-AVHRR_*-fv3.0.nc' + # ctp: + # mip: day + # raw: [ctp_desc, ctp_asc] + # raw_units: hPa + # file: '-ESACCI-L3U_CLOUD-CLD_PRODUCTS-AVHRR_*-fv3.0.nc' + # reff: + # mip: day + # raw: [cer_desc, cer_asc] + # raw_units: um + # file: '-ESACCI-L3U_CLOUD-CLD_PRODUCTS-AVHRR_*-fv3.0.nc' + # Monthly data + # clt: + # mip: Amon + # raw: cfc + # raw_units: '1' + # file: '-ESACCI-L3C_CLOUD-CLD_PRODUCTS-AVHRR_*-fv3.0.nc' + # lwp: + # mip: Amon + # raw_units: g/m2 + # file: '-ESACCI-L3C_CLOUD-CLD_PRODUCTS-AVHRR_*-fv3.0.nc' clwvi: - mip: mon - raw: [cwp_desc, cwp_asc] + mip: Amon + raw: iwp_allsky raw_units: g/m2 file: '-ESACCI-L3C_CLOUD-CLD_PRODUCTS-AVHRR_*-fv3.0.nc' - ctp: - mip: day - raw: [ctp_desc, ctp_asc] - raw_units: hPa - file: '-ESACCI-L3C_CLOUD-CLD_PRODUCTS-AVHRR_*-fv3.0.nc' - reff: - mip: mon - raw: [cer_desc, cer_asc] - raw_units: um - file: '-ESACCI-L3C_CLOUD-CLD_PRODUCTS-AVHRR_*-fv3.0.nc' - cod: - mip: mon - raw: [cot_desc, cot_asc] - raw_units: 1 - file: '-ESACCI-L3C_CLOUD-CLD_PRODUCTS-AVHRR_*-fv3.0.nc' - - #rlut: - # mip: day - # raw: toa_lwup_desc - # raw_units: W/m2 - # file: '-ESACCI-L3U_CLOUD-RAD_PRODUCTS-AVHRR_NOAA-*-fv3.0.nc' - + # rlut: + # mip: Amon + # raw: toa_lwup + # raw_units: W m-2 + # file: '-ESACCI-L3C_CLOUD-CLD_PRODUCTS-AVHRR_*-fv3.0.nc' + # rlutcs: + # mip: Amon + # raw: toa_lwup_clr + # raw_units: W m-2 + # file: '-ESACCI-L3C_CLOUD-CLD_PRODUCTS-AVHRR_*-fv3.0.nc' + # rsut: + # mip: Amon + # raw: toa_swup + # raw_units: W m-2 + # file: '-ESACCI-L3C_CLOUD-CLD_PRODUCTS-AVHRR_*-fv3.0.nc' + # rsutcs: + # mip: Amon + # raw: toa_swup_clr + # raw_units: W m-2 + # file: '-ESACCI-L3C_CLOUD-CLD_PRODUCTS-AVHRR_*-fv3.0.nc' + # rsdt: + # mip: Amon + # raw: toa_swdn + # raw_units: W m-2 + # file: '-ESACCI-L3C_CLOUD-CLD_PRODUCTS-AVHRR_*-fv3.0.nc' + # rlus: + # mip: Amon + # raw: boa_lwup + # raw_units: W m-2 + # file: '-ESACCI-L3C_CLOUD-CLD_PRODUCTS-AVHRR_*-fv3.0.nc' + # rsus: + # mip: Amon + # raw: boa_swup + # raw_units: W m-2 + # file: '-ESACCI-L3C_CLOUD-CLD_PRODUCTS-AVHRR_*-fv3.0.nc' + # rsuscs: + # mip: Amon + # raw: boa_swup_clr + # raw_units: W m-2 + # file: '-ESACCI-L3C_CLOUD-CLD_PRODUCTS-AVHRR_*-fv3.0.nc' diff --git a/esmvaltool/cmorizers/data/formatters/datasets/esacci_cloud.py b/esmvaltool/cmorizers/data/formatters/datasets/esacci_cloud.py index b542a4609e..197f65944a 100644 --- a/esmvaltool/cmorizers/data/formatters/datasets/esacci_cloud.py +++ b/esmvaltool/cmorizers/data/formatters/datasets/esacci_cloud.py @@ -24,6 +24,7 @@ from cf_units import Unit from dask import array as da from iris import NameConstraint +from iris.exceptions import ConstraintMismatchError, MergeError from calendar import monthrange from datetime import datetime @@ -32,27 +33,25 @@ logger = logging.getLogger(__name__) - def _create_nan_cube(cube, year, month, day): - """Create cube containing only NaN values from an existing cube.""" + """Create cube containing only NaN from existing cube.""" nan_cube = cube.copy() nan_cube.data = da.ma.masked_greater(cube.core_data(), -1e20) # Read dataset time unit and calendar from file dataset_time_unit = str(nan_cube.coord('time').units) - dataset_time_calendar = nan_cube.coord('time').units.calendar - - # Convert datetime to numeric time - new_time = cf_units.date2num(datetime(year=year, month=month, day=day), - dataset_time_unit, dataset_time_calendar) - nan_cube.coord('time').points = float(new_time) + 0.025390625 + dataset_time_calender = nan_cube.coord('time').units.calendar + # Convert datetime + newtime = datetime(year=year, month=month, day=day) + newtime = cf_units.date2num(newtime, dataset_time_unit, + dataset_time_calender) + nan_cube.coord('time').points = float(newtime) + 0.025390625 return nan_cube - -def _extract_variable(short_name, var, cfg, in_dir, out_dir, start_date, - end_date): - """Extract and process a variable.""" +def _extract_variable_daily(short_name, var, cfg, in_dir, + out_dir, start_date, end_date): + """Extract daily variable.""" fill_cube = None glob_attrs = cfg['attributes'] @@ -64,53 +63,58 @@ def _extract_variable(short_name, var, cfg, in_dir, out_dir, start_date, end_date = datetime(glob_attrs['end_year'], 12, 31) for year in range(start_date.year, end_date.year + 1): - for month in range(1, 13): + for month in range(start_date.month, end_date.month + 1): cubes = iris.cube.CubeList() num_days = monthrange(year, month)[1] + for iday in range(1, num_days + 1): + + filelist = glob.glob(os.path.join(in_dir, f'{year}{month:02}' + f'{iday:02}' + var['file'])) - for day in range(1, num_days + 1): - file_pattern = f"{year}{month:02}{day:02}" + var['file'] - file_list = glob.glob(os.path.join(in_dir, file_pattern)) + if filelist: - if file_list: - for file_index, file_path in enumerate(file_list): - logger.info("Processing file: %s", file_path) + for inum, ifile in enumerate(filelist): + logger.info("CMORizing file %s", ifile) - raw_var = var.get('raw', [short_name]) + # load data + raw_var = var.get('raw', short_name) - for var_index, raw_name in enumerate(raw_var): - daily_cube = iris.load_cube( - file_path, NameConstraint(var_name=raw_name)) + for ivar, raw_name in enumerate(raw_var): + daily_cube = iris.load_cube(ifile, NameConstraint(var_name=raw_name)) - # Adjust time - daily_cube.coord('time').points += ( - file_index + 0.5 * var_index) * 0.1 + # set arbitrary time of day + daily_cube.coord('time').points = (daily_cube.coord('time').points + + (inum + 0.5 * ivar) * 0.1) daily_cube.attributes.clear() daily_cube.coord('time').long_name = 'time' - # Fix coordinates, dtype, and metadata + # Fix coordinates daily_cube = utils.fix_coords(daily_cube) + #Fix dtype utils.fix_dtype(daily_cube) + #Fix metadata utils.fix_var_metadata(daily_cube, cmor_info) if fill_cube is None: fill_cube = daily_cube cubes.append(daily_cube) + else: - logger.info("Filling missing day: %s-%s-%s", - year, month, day) - daily_cube = _create_nan_cube(fill_cube, year, month, day) + + logger.info("Fill missing day %s in month %s and year %s", iday, month, year) + daily_cube = _create_nan_cube(fill_cube, year, month, iday) cubes.append(daily_cube) - # Combine cubes and process cube = cubes.concatenate_cube() + + # regridding from 0.05x0.05 to 0.5x0.5 cube = regrid(cube, target_grid='0.5x0.5', scheme='area_weighted') - logger.info("Calculating daily statistics") + logger.info("Building daily means") + # Calc daily cube = daily_statistics(cube) - # Adjust units + # Fix units if short_name == 'clt': cube.data = 100 * cube.core_data() else: @@ -118,20 +122,202 @@ def _extract_variable(short_name, var, cfg, in_dir, out_dir, start_date, cube.units = var['raw_units'] cube.convert_units(cmor_info.units) - # Finalize metadata + # Fix metadata and update version information attrs = copy.deepcopy(cfg['attributes']) attrs['mip'] = var['mip'] utils.set_global_atts(cube, attrs) - # Save the variable - utils.save_variable(cube, short_name, out_dir, attrs, + # Save variable + utils.save_variable(cube, + short_name, + out_dir, + attrs, unlimited_dimensions=['time']) -def cmorization(in_dir, out_dir, cfg, cfg_user, start_date=None, - end_date=None): - """Main cmorization function.""" - for short_name, var in cfg['variables'].items(): - logger.info("CMORizing variable: %s", short_name) - _extract_variable(short_name, var, cfg, in_dir, out_dir, - start_date, end_date) +# def _extract_variable_monthly(short_name, var, cfg, in_dir, out_dir, start_date, end_date): +# """Extract monthly variable with improved handling for multiple cubes.""" + +# glob_attrs = cfg['attributes'] +# cmor_info = cfg['cmor_table'].get_variable(var['mip'], short_name) + +# if not start_date: +# start_date = datetime(glob_attrs['start_year'], 1, 1) +# if not end_date: +# end_date = datetime(glob_attrs['end_year'], 12, 31) + +# for year in range(start_date.year, end_date.year + 1): +# for month in range(1, 13): # Cover all months +# # Search for files for the given year and month +# filelist = glob.glob(os.path.join(in_dir, f"{year}{month:02}" + var['file'])) + +# if not filelist: +# logger.warning("No monthly file found for %s-%02d", year, month) +# continue + +# for ifile in filelist: +# logger.info("CMORizing file %s", ifile) +# try: +# # Attempt to load the cube using a constraint +# constraint = iris.Constraint(var_name=short_name) +# cube = iris.load_cube(ifile, constraint) + +# if cube is None: +# logger.warning("Cube could not be loaded for file '%s'", ifile) +# continue # Skip this file and move to the next + +# except (ConstraintMismatchError, MergeError) as e: +# logger.warning("Constraint mismatch in file '%s': %s", ifile, e) +# cubes = iris.load(ifile) +# matching_cubes = [c for c in cubes if c.var_name == short_name] + +# if not matching_cubes: +# logger.error("No cube found with var_name '%s' in file '%s'", short_name, ifile) +# continue # Skip this file + +# if len(matching_cubes) > 1: +# logger.warning( +# "Multiple cubes found with var_name '%s' in file '%s'. Using the first one.", +# short_name, ifile +# ) +# cube = matching_cubes[0] # Use the first matching cube + +# except Exception as e: +# logger.error("Unexpected error while loading file '%s': %s", ifile, e) +# continue + +# try: +# # Fix coordinates +# logger.info("Fixing coordinates for cube '%s'", cube) +# cube = utils.fix_coords(cube) + +# # Regrid to target grid +# cube = regrid(cube, target_grid='0.5x0.5', scheme='area_weighted') + +# # Fix units +# if 'raw_units' in var: +# cube.units = var['raw_units'] +# cube.convert_units(cmor_info.units) + +# # Fix metadata and update global attributes +# attrs = copy.deepcopy(cfg['attributes']) +# attrs['mip'] = var['mip'] +# utils.set_global_atts(cube, attrs) + +# # Save the processed variable +# utils.save_variable( +# cube, +# short_name, +# out_dir, +# attrs, +# unlimited_dimensions=['time'] +# ) +# except Exception as e: +# logger.error("Error processing cube for file '%s': %s", ifile, e) + + +def _extract_variable_monthly(short_name, var, cfg, in_dir, out_dir, start_date, end_date): + """Extract monthly variable with improved handling for multiple cubes.""" + + glob_attrs = cfg['attributes'] + cmor_info = cfg['cmor_table'].get_variable(var['mip'], short_name) + + if not start_date: + start_date = datetime(glob_attrs['start_year'], 1, 1) + if not end_date: + end_date = datetime(glob_attrs['end_year'], 12, 31) + + for year in range(start_date.year, end_date.year + 1): + for month in range(1, 13): # Loop through all months (1-12) + # Construct the file list for the current month + filelist = glob.glob(os.path.join(in_dir, f"{year}{month:02}" + var['file'])) + + if not filelist: + logger.warning("No monthly file found for %s-%02d", year, month) + continue + + cubes = iris.cube.CubeList() + + for ifile in filelist: + logger.info("CMORizing file %s", ifile) + + try: + # Extract raw names from the variable dictionary, like in the daily function + raw_var = var.get('raw', short_name) + for ivar, raw_name in enumerate(raw_var): + # Try to load the cube using a constraint based on the raw_name + cube = iris.load_cube(ifile, NameConstraint(var_name=raw_name)) + + if cube is None: + logger.warning("Cube could not be loaded for file '%s'", ifile) + continue # Skip this file and move to the next + + # Set an arbitrary time of day for the monthly data (similar to daily) + cube.coord('time').points = (cube.coord('time').points + (ivar + 0.5) * 0.1) + cube.attributes.clear() + cube.coord('time').long_name = 'time' + + # Fix coordinates + cube = utils.fix_coords(cube) + + # Fix data type + utils.fix_dtype(cube) + + # Fix metadata + utils.fix_var_metadata(cube, cmor_info) + + # Add the cube to the list + cubes.append(cube) + + except Exception as e: + logger.error("Error processing file '%s': %s", ifile, e) + + # After gathering all cubes for the month, concatenate them + if cubes: + cube = cubes.concatenate_cube() + + # Regrid the cube to the target grid (e.g., 0.5x0.5) + cube = regrid(cube, target_grid='0.5x0.5', scheme='area_weighted') + + # Fix units and handle any special cases like 'clt' + if short_name == 'clt': + cube.data = 100 * cube.core_data() # Example conversion + else: + if 'raw_units' in var: + cube.units = var['raw_units'] + cube.convert_units(cmor_info.units) + + # Set global attributes and fix metadata + attrs = copy.deepcopy(cfg['attributes']) + attrs['mip'] = var['mip'] + utils.set_global_atts(cube, attrs) + + # Save the processed variable + utils.save_variable( + cube, + short_name, + out_dir, + attrs, + unlimited_dimensions=['time'] + ) + + else: + logger.warning("No valid cubes processed for %s-%02d", year, month) + + +def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date): + """CMORization function call.""" + # Run the cmorization + for (short_name, var) in cfg['variables'].items(): + logger.info("CMORizing variable '%s'", short_name) + if 'L3U' in var['file']: + _extract_variable_daily(short_name, var, cfg, in_dir, out_dir, + start_date, end_date) + elif 'L3C' in var['file']: + _extract_variable_monthly(short_name, var, cfg, in_dir, out_dir, + start_date, end_date) + + + + +