diff --git a/esmvaltool/cmorizers/data/downloaders/datasets/esacci_cloud.py b/esmvaltool/cmorizers/data/downloaders/datasets/esacci_cloud.py index 905f413349..3c41832876 100644 --- a/esmvaltool/cmorizers/data/downloaders/datasets/esacci_cloud.py +++ b/esmvaltool/cmorizers/data/downloaders/datasets/esacci_cloud.py @@ -29,9 +29,9 @@ def download_dataset(config, dataset, dataset_info, start_date, end_date, Overwrite already downloaded files """ if start_date is None: - start_date = datetime(2000, 1, 1) + start_date = datetime(2003, 1, 1) if end_date is None: - end_date = datetime(2007, 12, 31) + end_date = datetime(2003, 12, 31) loop_date = start_date downloader = WGetDownloader( @@ -47,22 +47,11 @@ def download_dataset(config, dataset, dataset_info, start_date, end_date, base_path_l3c = ('https://public.satproj.klima.dwd.de/data/ESA_Cloud_CCI/' 'CLD_PRODUCTS/v3.0/L3C/') - # File patterns for daily (L3U) and monthly (L3C) data - files_l3u = [ - "*-ESACCI-L3U_CLOUD-CLD_MASKTYPE-AVHRR_*-fv3.0.nc", - "*-ESACCI-L3U_CLOUD-CLD_PRODUCTS-AVHRR_*-fv3.0.nc" - ] - files_l3c = ["*-ESACCI-L3C_CLOUD-CLD_PRODUCTS-AVHRR_*-fv3.0.nc"] - wget_options = [ '-r', - '-nH', # Disable the creation of directory structure - '-e', - 'robots=off', # Ignore robots.txt - '--cut-dirs=9', + '-e robots=off', # Ignore robots.txt '--no-parent', # Don't ascend to the parent directory '--reject="index.html"', # Reject any HTML files - '--accept=*.nc' # Accept only .nc files ] while loop_date <= end_date: @@ -70,59 +59,74 @@ def download_dataset(config, dataset, dataset_info, start_date, end_date, month = loop_date.month date = f'{year}{month:02}' - if int(date) in range(198201, 198601): - sat_am = 'AVHRR-PM/AVHRR_NOAA-7/' + if int(date) in range(198201, 198502): + sat_am = '' + sat_pm = 'AVHRR-PM/AVHRR_NOAA-7/' + elif int(date) in range(198502, 198811): + sat_am = '' sat_pm = 'AVHRR-PM/AVHRR_NOAA-9/' - elif int(date) in range(198601, 198901): - sat_am = 'AVHRR-PM/AVHRR_NOAA-9/' + elif int(date) in range(198811, 199109): + sat_am = '' + sat_pm = 'AVHRR-PM/AVHRR_NOAA-11/' + elif int(date) in range(199109, 199409): + sat_am = 'AVHRR-AM/AVHRR_NOAA-12/' sat_pm = 'AVHRR-PM/AVHRR_NOAA-11/' - elif int(date) in range(198901, 199501): - sat_am = 'AVHRR-PM/AVHRR_NOAA-11/' + elif int(date) in range(199409, 199502): + sat_am = 'AVHRR-AM/AVHRR_NOAA-12/' + sat_pm = '' + elif int(date) in range(199502, 199901): + sat_am = 'AVHRR-AM/AVHRR_NOAA-12/' sat_pm = 'AVHRR-PM/AVHRR_NOAA-14/' - elif int(date) in range(199501, 200101): - sat_am = 'AVHRR-PM/AVHRR_NOAA-14/' + elif int(date) in range(199901, 200104): + sat_am = 'AVHRR-AM/AVHRR_NOAA-15/' + sat_pm = 'AVHRR-PM/AVHRR_NOAA-14/' + elif int(date) in range(200104, 200211): + sat_am = 'AVHRR-AM/AVHRR_NOAA-15/' sat_pm = 'AVHRR-PM/AVHRR_NOAA-16/' - elif int(date) in range(200101, 200501): + elif int(date) in range(200211, 200509): sat_am = 'AVHRR-AM/AVHRR_NOAA-17/' sat_pm = 'AVHRR-PM/AVHRR_NOAA-16/' - elif int(date) in range(200501, 200701): + elif int(date) in range(200509, 200707): sat_am = 'AVHRR-AM/AVHRR_NOAA-17/' sat_pm = 'AVHRR-PM/AVHRR_NOAA-18/' - elif int(date) in range(200701, 200901): + elif int(date) in range(200707, 200906): sat_am = 'AVHRR-AM/AVHRR_METOPA/' sat_pm = 'AVHRR-PM/AVHRR_NOAA-18/' - elif int(date) in range(200901, 201701): + elif int(date) in range(200906, 201701): sat_am = 'AVHRR-AM/AVHRR_METOPA/' sat_pm = 'AVHRR-PM/AVHRR_NOAA-19/' else: - logger.error("Number of instrument is not defined for date %s", + logger.error("Data for this date %s is not available", date) - # Download daily data from L3U - for sat in (sat_am, sat_pm): - logger.info("Downloading daily data (L3U) for sat = %s", sat) - if sat != '': - folder_l3u = base_path_l3u + sat + f'{year}/{month:02}' - logger.info("Download folder for daily data (L3U): %s", - folder_l3u) - try: - downloader.download_file(folder_l3u, wget_options) - except Exception as e: - logger.error("Failed to download daily data from %s: %s", - folder_l3u, str(e)) - # Download monthly data from L3C for sat in (sat_am, sat_pm): - logger.info("Downloading monthly data (L3C) for sat = %s", sat) if sat != '': + # monthly data + logger.info("Downloading monthly data (L3C) for sat = %s", sat) folder_l3c = base_path_l3c + sat + f'{year}/' + wget_options_l3c = wget_options.copy() + wget_options_l3c.append(f'--accept={date}*.nc') logger.info("Download folder for monthly data (L3C): %s", folder_l3c) try: - downloader.download_file(folder_l3c, wget_options) + downloader.download_file(folder_l3c, wget_options_l3c) except Exception as e: logger.error("Failed to download monthly data from %s: %s", folder_l3c, str(e)) + # daily data + logger.info("Downloading daily data (L3U) for sat = %s", sat) + folder_l3u = base_path_l3u + sat + f'{year}/{month:02}' + wget_options_l3u = wget_options.copy() + wget_options_l3u.append(f'--accept={date}*CLD_MASKTYPE*.nc,{date}*CLD_PRODUCTS*.nc') + logger.info("Download folder for daily data (L3U): %s", + folder_l3u) + try: + downloader.download_file(folder_l3u, wget_options_l3u) + except Exception as e: + logger.error("Failed to download daily data from %s: %s", + folder_l3u, str(e)) + # Increment the loop_date by one month loop_date += relativedelta.relativedelta(months=1) diff --git a/esmvaltool/cmorizers/data/formatters/datasets/esacci_cloud.py b/esmvaltool/cmorizers/data/formatters/datasets/esacci_cloud.py index 197f65944a..864c087ced 100644 --- a/esmvaltool/cmorizers/data/formatters/datasets/esacci_cloud.py +++ b/esmvaltool/cmorizers/data/formatters/datasets/esacci_cloud.py @@ -135,86 +135,6 @@ def _extract_variable_daily(short_name, var, cfg, in_dir, unlimited_dimensions=['time']) -# def _extract_variable_monthly(short_name, var, cfg, in_dir, out_dir, start_date, end_date): -# """Extract monthly variable with improved handling for multiple cubes.""" - -# glob_attrs = cfg['attributes'] -# cmor_info = cfg['cmor_table'].get_variable(var['mip'], short_name) - -# if not start_date: -# start_date = datetime(glob_attrs['start_year'], 1, 1) -# if not end_date: -# end_date = datetime(glob_attrs['end_year'], 12, 31) - -# for year in range(start_date.year, end_date.year + 1): -# for month in range(1, 13): # Cover all months -# # Search for files for the given year and month -# filelist = glob.glob(os.path.join(in_dir, f"{year}{month:02}" + var['file'])) - -# if not filelist: -# logger.warning("No monthly file found for %s-%02d", year, month) -# continue - -# for ifile in filelist: -# logger.info("CMORizing file %s", ifile) -# try: -# # Attempt to load the cube using a constraint -# constraint = iris.Constraint(var_name=short_name) -# cube = iris.load_cube(ifile, constraint) - -# if cube is None: -# logger.warning("Cube could not be loaded for file '%s'", ifile) -# continue # Skip this file and move to the next - -# except (ConstraintMismatchError, MergeError) as e: -# logger.warning("Constraint mismatch in file '%s': %s", ifile, e) -# cubes = iris.load(ifile) -# matching_cubes = [c for c in cubes if c.var_name == short_name] - -# if not matching_cubes: -# logger.error("No cube found with var_name '%s' in file '%s'", short_name, ifile) -# continue # Skip this file - -# if len(matching_cubes) > 1: -# logger.warning( -# "Multiple cubes found with var_name '%s' in file '%s'. Using the first one.", -# short_name, ifile -# ) -# cube = matching_cubes[0] # Use the first matching cube - -# except Exception as e: -# logger.error("Unexpected error while loading file '%s': %s", ifile, e) -# continue - -# try: -# # Fix coordinates -# logger.info("Fixing coordinates for cube '%s'", cube) -# cube = utils.fix_coords(cube) - -# # Regrid to target grid -# cube = regrid(cube, target_grid='0.5x0.5', scheme='area_weighted') - -# # Fix units -# if 'raw_units' in var: -# cube.units = var['raw_units'] -# cube.convert_units(cmor_info.units) - -# # Fix metadata and update global attributes -# attrs = copy.deepcopy(cfg['attributes']) -# attrs['mip'] = var['mip'] -# utils.set_global_atts(cube, attrs) - -# # Save the processed variable -# utils.save_variable( -# cube, -# short_name, -# out_dir, -# attrs, -# unlimited_dimensions=['time'] -# ) -# except Exception as e: -# logger.error("Error processing cube for file '%s': %s", ifile, e) - def _extract_variable_monthly(short_name, var, cfg, in_dir, out_dir, start_date, end_date): """Extract monthly variable with improved handling for multiple cubes."""