Skip to content

Commit

Permalink
fix downloader for monthly and daily data
Browse files Browse the repository at this point in the history
  • Loading branch information
LisaBock committed Dec 17, 2024
1 parent a6facee commit 7c8eed7
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 122 deletions.
88 changes: 46 additions & 42 deletions esmvaltool/cmorizers/data/downloaders/datasets/esacci_cloud.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,9 @@ def download_dataset(config, dataset, dataset_info, start_date, end_date,
Overwrite already downloaded files
"""
if start_date is None:
start_date = datetime(2000, 1, 1)
start_date = datetime(2003, 1, 1)
if end_date is None:
end_date = datetime(2007, 12, 31)
end_date = datetime(2003, 12, 31)
loop_date = start_date

downloader = WGetDownloader(
Expand All @@ -47,82 +47,86 @@ def download_dataset(config, dataset, dataset_info, start_date, end_date,
base_path_l3c = ('https://public.satproj.klima.dwd.de/data/ESA_Cloud_CCI/'
'CLD_PRODUCTS/v3.0/L3C/')

# File patterns for daily (L3U) and monthly (L3C) data
files_l3u = [
"*-ESACCI-L3U_CLOUD-CLD_MASKTYPE-AVHRR_*-fv3.0.nc",
"*-ESACCI-L3U_CLOUD-CLD_PRODUCTS-AVHRR_*-fv3.0.nc"
]
files_l3c = ["*-ESACCI-L3C_CLOUD-CLD_PRODUCTS-AVHRR_*-fv3.0.nc"]

wget_options = [
'-r',
'-nH', # Disable the creation of directory structure
'-e',
'robots=off', # Ignore robots.txt
'--cut-dirs=9',
'-e robots=off', # Ignore robots.txt
'--no-parent', # Don't ascend to the parent directory
'--reject="index.html"', # Reject any HTML files
'--accept=*.nc' # Accept only .nc files
]

while loop_date <= end_date:
year = loop_date.year
month = loop_date.month
date = f'{year}{month:02}'

if int(date) in range(198201, 198601):
sat_am = 'AVHRR-PM/AVHRR_NOAA-7/'
if int(date) in range(198201, 198502):
sat_am = ''
sat_pm = 'AVHRR-PM/AVHRR_NOAA-7/'
elif int(date) in range(198502, 198811):
sat_am = ''
sat_pm = 'AVHRR-PM/AVHRR_NOAA-9/'
elif int(date) in range(198601, 198901):
sat_am = 'AVHRR-PM/AVHRR_NOAA-9/'
elif int(date) in range(198811, 199109):
sat_am = ''
sat_pm = 'AVHRR-PM/AVHRR_NOAA-11/'
elif int(date) in range(199109, 199409):
sat_am = 'AVHRR-AM/AVHRR_NOAA-12/'
sat_pm = 'AVHRR-PM/AVHRR_NOAA-11/'
elif int(date) in range(198901, 199501):
sat_am = 'AVHRR-PM/AVHRR_NOAA-11/'
elif int(date) in range(199409, 199502):
sat_am = 'AVHRR-AM/AVHRR_NOAA-12/'
sat_pm = ''
elif int(date) in range(199502, 199901):
sat_am = 'AVHRR-AM/AVHRR_NOAA-12/'
sat_pm = 'AVHRR-PM/AVHRR_NOAA-14/'
elif int(date) in range(199501, 200101):
sat_am = 'AVHRR-PM/AVHRR_NOAA-14/'
elif int(date) in range(199901, 200104):
sat_am = 'AVHRR-AM/AVHRR_NOAA-15/'
sat_pm = 'AVHRR-PM/AVHRR_NOAA-14/'
elif int(date) in range(200104, 200211):
sat_am = 'AVHRR-AM/AVHRR_NOAA-15/'
sat_pm = 'AVHRR-PM/AVHRR_NOAA-16/'
elif int(date) in range(200101, 200501):
elif int(date) in range(200211, 200509):
sat_am = 'AVHRR-AM/AVHRR_NOAA-17/'
sat_pm = 'AVHRR-PM/AVHRR_NOAA-16/'
elif int(date) in range(200501, 200701):
elif int(date) in range(200509, 200707):
sat_am = 'AVHRR-AM/AVHRR_NOAA-17/'
sat_pm = 'AVHRR-PM/AVHRR_NOAA-18/'
elif int(date) in range(200701, 200901):
elif int(date) in range(200707, 200906):
sat_am = 'AVHRR-AM/AVHRR_METOPA/'
sat_pm = 'AVHRR-PM/AVHRR_NOAA-18/'
elif int(date) in range(200901, 201701):
elif int(date) in range(200906, 201701):
sat_am = 'AVHRR-AM/AVHRR_METOPA/'
sat_pm = 'AVHRR-PM/AVHRR_NOAA-19/'
else:
logger.error("Number of instrument is not defined for date %s",
logger.error("Data for this date %s is not available",
date)

# Download daily data from L3U
for sat in (sat_am, sat_pm):
logger.info("Downloading daily data (L3U) for sat = %s", sat)
if sat != '':
folder_l3u = base_path_l3u + sat + f'{year}/{month:02}'
logger.info("Download folder for daily data (L3U): %s",
folder_l3u)
try:
downloader.download_file(folder_l3u, wget_options)
except Exception as e:
logger.error("Failed to download daily data from %s: %s",
folder_l3u, str(e))

# Download monthly data from L3C
for sat in (sat_am, sat_pm):
logger.info("Downloading monthly data (L3C) for sat = %s", sat)
if sat != '':
# monthly data
logger.info("Downloading monthly data (L3C) for sat = %s", sat)
folder_l3c = base_path_l3c + sat + f'{year}/'
wget_options_l3c = wget_options.copy()
wget_options_l3c.append(f'--accept={date}*.nc')
logger.info("Download folder for monthly data (L3C): %s",
folder_l3c)
try:
downloader.download_file(folder_l3c, wget_options)
downloader.download_file(folder_l3c, wget_options_l3c)
except Exception as e:
logger.error("Failed to download monthly data from %s: %s",
folder_l3c, str(e))

# daily data
logger.info("Downloading daily data (L3U) for sat = %s", sat)
folder_l3u = base_path_l3u + sat + f'{year}/{month:02}'
wget_options_l3u = wget_options.copy()
wget_options_l3u.append(f'--accept={date}*CLD_MASKTYPE*.nc,{date}*CLD_PRODUCTS*.nc')
logger.info("Download folder for daily data (L3U): %s",
folder_l3u)
try:
downloader.download_file(folder_l3u, wget_options_l3u)
except Exception as e:
logger.error("Failed to download daily data from %s: %s",
folder_l3u, str(e))

# Increment the loop_date by one month
loop_date += relativedelta.relativedelta(months=1)
80 changes: 0 additions & 80 deletions esmvaltool/cmorizers/data/formatters/datasets/esacci_cloud.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,86 +135,6 @@ def _extract_variable_daily(short_name, var, cfg, in_dir,
unlimited_dimensions=['time'])


# def _extract_variable_monthly(short_name, var, cfg, in_dir, out_dir, start_date, end_date):
# """Extract monthly variable with improved handling for multiple cubes."""

# glob_attrs = cfg['attributes']
# cmor_info = cfg['cmor_table'].get_variable(var['mip'], short_name)

# if not start_date:
# start_date = datetime(glob_attrs['start_year'], 1, 1)
# if not end_date:
# end_date = datetime(glob_attrs['end_year'], 12, 31)

# for year in range(start_date.year, end_date.year + 1):
# for month in range(1, 13): # Cover all months
# # Search for files for the given year and month
# filelist = glob.glob(os.path.join(in_dir, f"{year}{month:02}" + var['file']))

# if not filelist:
# logger.warning("No monthly file found for %s-%02d", year, month)
# continue

# for ifile in filelist:
# logger.info("CMORizing file %s", ifile)
# try:
# # Attempt to load the cube using a constraint
# constraint = iris.Constraint(var_name=short_name)
# cube = iris.load_cube(ifile, constraint)

# if cube is None:
# logger.warning("Cube could not be loaded for file '%s'", ifile)
# continue # Skip this file and move to the next

# except (ConstraintMismatchError, MergeError) as e:
# logger.warning("Constraint mismatch in file '%s': %s", ifile, e)
# cubes = iris.load(ifile)
# matching_cubes = [c for c in cubes if c.var_name == short_name]

# if not matching_cubes:
# logger.error("No cube found with var_name '%s' in file '%s'", short_name, ifile)
# continue # Skip this file

# if len(matching_cubes) > 1:
# logger.warning(
# "Multiple cubes found with var_name '%s' in file '%s'. Using the first one.",
# short_name, ifile
# )
# cube = matching_cubes[0] # Use the first matching cube

# except Exception as e:
# logger.error("Unexpected error while loading file '%s': %s", ifile, e)
# continue

# try:
# # Fix coordinates
# logger.info("Fixing coordinates for cube '%s'", cube)
# cube = utils.fix_coords(cube)

# # Regrid to target grid
# cube = regrid(cube, target_grid='0.5x0.5', scheme='area_weighted')

# # Fix units
# if 'raw_units' in var:
# cube.units = var['raw_units']
# cube.convert_units(cmor_info.units)

# # Fix metadata and update global attributes
# attrs = copy.deepcopy(cfg['attributes'])
# attrs['mip'] = var['mip']
# utils.set_global_atts(cube, attrs)

# # Save the processed variable
# utils.save_variable(
# cube,
# short_name,
# out_dir,
# attrs,
# unlimited_dimensions=['time']
# )
# except Exception as e:
# logger.error("Error processing cube for file '%s': %s", ifile, e)


def _extract_variable_monthly(short_name, var, cfg, in_dir, out_dir, start_date, end_date):
"""Extract monthly variable with improved handling for multiple cubes."""
Expand Down

0 comments on commit 7c8eed7

Please sign in to comment.