diff --git a/doc/sphinx/source/input.rst b/doc/sphinx/source/input.rst index f9bcfafc3e..d2d85a9ffa 100644 --- a/doc/sphinx/source/input.rst +++ b/doc/sphinx/source/input.rst @@ -337,7 +337,7 @@ A list of the datasets for which a CMORizers is available is provided in the fol +------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+ | ESACCI-SOILMOISTURE | sm (Eday, Lmon), smStderr (Eday) | 2 | Python | +------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+ -| ESACCI-SST | ts, tsStderr (Amon) | 2 | NCL | +| ESACCI-SST | tos (Omon, Oday), tosStderr (Oday) | 2 | Python | +------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+ | ESACCI-WATERVAPOUR | prw (Amon) | 3 | Python | +------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+ diff --git a/esmvaltool/cmorizers/data/cmor_config/ESACCI-SST.yml b/esmvaltool/cmorizers/data/cmor_config/ESACCI-SST.yml index 49d4731681..1889e387f8 100644 --- a/esmvaltool/cmorizers/data/cmor_config/ESACCI-SST.yml +++ b/esmvaltool/cmorizers/data/cmor_config/ESACCI-SST.yml @@ -1,29 +1,29 @@ --- # Common global attributes for Cmorizer output -filename: '{year}{month}15_regridded_sst.nc' + attributes: dataset_id: ESACCI-SST - version: '2.2' + version: 3.0-L4-analysis tier: 2 modeling_realm: sat - project_id: OBS - source: 'http://surftemp.net/regridding/index.html' - reference: ["esacci-sst", "esacci-sst-bias-correction"] - comment: "Note that the variable tsStderr is an uncertainty not a standard error." + project_id: OBS6 + source: 'dx.doi.org/10.5285/4a9654136a7148e39b7feb56f8bb02d2' + reference: ["esacci-sst"] -# Variables to cmorize (here use only filename prefix) +# Variables to cmorize (here use only filename ending) variables: - ts: - mip: Amon - raw: sst - file: ESACCI-SST_sat_L4-GHRSST-SSTdepth-OSTIA-GLOB - tsStderr: - mip: Amon - raw: sst_uncertainty - file: ESACCI-SST_sat_L4-GHRSST-SSTdepth-OSTIA-GLOB + tos: + mip: [Oday, Omon] + raw: analysed_sst + frequency: day + filename: ESACCI-L4_GHRSST-SSTdepth-OSTIA-GLOB_CDR3.0-v02.0-fv01.0.nc + start_year: 1980 + end_year: 2021 -# uncomment this part to produce sst cmorized data for ocean realm (Omon, tos) -# tos: -# mip: Omon -# raw: sst -# file: ESACCI-SST_sat_L4-GHRSST-SSTdepth-OSTIA-GLOB + tosStderr: + mip: [Oday] + raw: analysed_sst_uncertainty + frequency: day + filename: ESACCI-L4_GHRSST-SSTdepth-OSTIA-GLOB_CDR3.0-v02.0-fv01.0.nc + start_year: 1980 + end_year: 2021 diff --git a/esmvaltool/cmorizers/data/datasets.yml b/esmvaltool/cmorizers/data/datasets.yml index 4c7c168009..3b01cd2e39 100644 --- a/esmvaltool/cmorizers/data/datasets.yml +++ b/esmvaltool/cmorizers/data/datasets.yml @@ -551,11 +551,11 @@ datasets: ESACCI-SST: tier: 2 - source: ftp://anon-ftp.ceda.ac.uk/neodc/esacci/sst/data/ - last_access: 2019-02-01 + source: dap.ceda.ac.uk/neodc/eocis/data/global_and_regional/sea_surface_temperature/ + last_access: 2024-12-11 info: | Download the data from: - lt/Analysis/L4/v01.1/ + CDR_v3/Analysis/L4/v3.0.1/ Put all files under a single directory (no subdirectories with years). ESACCI-WATERVAPOUR: diff --git a/esmvaltool/cmorizers/data/downloaders/datasets/esacci_sst.py b/esmvaltool/cmorizers/data/downloaders/datasets/esacci_sst.py new file mode 100644 index 0000000000..30c1f6c32d --- /dev/null +++ b/esmvaltool/cmorizers/data/downloaders/datasets/esacci_sst.py @@ -0,0 +1,56 @@ +"""Script to download ESACCI-SST.""" +import logging + +from datetime import datetime +from dateutil import relativedelta + +from esmvaltool.cmorizers.data.downloaders.wget import WGetDownloader + +logger = logging.getLogger(__name__) + + +def download_dataset(config, dataset, dataset_info, start_date, end_date, + overwrite): + """Download dataset. + + Parameters + ---------- + config : dict + ESMValTool's user configuration + dataset : str + Name of the dataset + dataset_info : dict + Dataset information from the datasets.yml file + start_date : datetime + Start of the interval to download + end_date : datetime + End of the interval to download + overwrite : bool + Overwrite already downloaded files + """ + if start_date is None: + start_date = datetime(1980, 1, 1) + if end_date is None: + end_date = datetime(2021, 12, 31) + + loop_date = start_date + + downloader = WGetDownloader( + config=config, + dataset=dataset, + dataset_info=dataset_info, + overwrite=overwrite, + ) + + path = ("https://dap.ceda.ac.uk/neodc/eocis/data/global_and_regional/" + "sea_surface_temperature/CDR_v3/Analysis/L4/v3.0.1/") + + while loop_date <= end_date: + year = loop_date.year + month = loop_date.strftime("%m") + day = loop_date.strftime("%d") + folder = path + f'{year}/{month}/{day}/' + downloader.download_folder(folder, wget_options=['-e robots=off', + '--no-parent', + '--accept=nc']) + loop_date += relativedelta.relativedelta(days=1) diff --git a/esmvaltool/cmorizers/data/downloaders/ftp.py b/esmvaltool/cmorizers/data/downloaders/ftp.py index 9f0cd5e8f9..7f9d4e6492 100644 --- a/esmvaltool/cmorizers/data/downloaders/ftp.py +++ b/esmvaltool/cmorizers/data/downloaders/ftp.py @@ -35,16 +35,22 @@ class FTPDownloader(BaseDownloader): overwrite : bool Overwrite already downloaded files """ - def __init__(self, config, server, dataset, dataset_info, overwrite): + def __init__(self, config, server, dataset, dataset_info, overwrite, + user=None, passwd=None): super().__init__(config, dataset, dataset_info, overwrite) self._client = None self.server = server + self.user = user + self.passwd = passwd def connect(self): """Connect to the FTP server.""" self._client = ftplib.FTP(self.server) logger.info(self._client.getwelcome()) - self._client.login() + if self.user is None: + self._client.login() + else: + self._client.login(user=self.user, passwd=self.passwd) def set_cwd(self, path): """Set current working directory in the remote. diff --git a/esmvaltool/cmorizers/data/formatters/datasets/esacci_sst.py b/esmvaltool/cmorizers/data/formatters/datasets/esacci_sst.py index c009b96ffb..6ff6e97d99 100644 --- a/esmvaltool/cmorizers/data/formatters/datasets/esacci_sst.py +++ b/esmvaltool/cmorizers/data/formatters/datasets/esacci_sst.py @@ -4,42 +4,30 @@ Tier 2: other freely-available dataset. Source - http://surftemp.net/regridding/index.html + https://catalogue.ceda.ac.uk/uuid/4a9654136a7148e39b7feb56f8bb02d2/ Last access - 20201214 - -Download and processing instructions - Download the following files: - Go to http://surftemp.net/regridding/index.html - and request regridded data with the following options: - Time Resolution: monthly - Longitude Resolution: 0.5 - Latitude Resolution: 0.5 - Start Date: 1982-01-01 - End Date: 2019-12-31 - Exclude data above sea ice threshold: True - (Threshold: 100 %) - Include post-hoc SST bias adjustments: True - Output Absolute or Anomaly SST: absolute - Generate Sea Ice Fraction: True - Error Correlation in Time (Days): 7 - Error Correlation In Space (Degrees): 3.0 - -Modification history - 20201204-roberts_charles: written. - 20201214-predoi_valeriu: approved. - 20201214-lauer_axel: approved. + 20241211 + +#Download and processing instructions +# A donwnloader is provided by ESMValTool. +# (esmvaltool/cmorizers/data/downloaders/esacci_sst.py) + """ +import copy +import glob import logging import os import iris +from datetime import datetime +from esmvalcore.cmor.fixes import get_time_bounds +from esmvalcore.preprocessor import regrid +from esmvaltool.cmorizers.data import utilities as utils from esmvalcore.preprocessor import concatenate from ...utilities import ( - convert_timeunits, fix_coords, fix_var_metadata, save_variable, @@ -49,49 +37,126 @@ logger = logging.getLogger(__name__) -def extract_variable(var_info, raw_info, attrs, year): +def extract_variable(raw_info): """Extract to all vars.""" rawvar = raw_info['name'] constraint = iris.NameConstraint(var_name=rawvar) - try: - cube = iris.load_cube(raw_info['file'], constraint) - except iris.exceptions.ConstraintMismatchError as constraint_error: - raise ValueError(f"No data available for variable {rawvar}" - f"and year {year}") from constraint_error - - # Fix cube - fix_var_metadata(cube, var_info) - convert_timeunits(cube, year) - cube = fix_coords(cube) + if rawvar == 'analysed_sst_uncertainty': + tmp_cube = iris.load_cube(raw_info['file'], + iris.NameConstraint(var_name='analysed_sst')) + ancillary_var = tmp_cube.ancillary_variable('sea_water_temperature' + ' standard_error') + cube = tmp_cube.copy(ancillary_var.core_data()) + else: + try: + cube = iris.load_cube(raw_info['file'], constraint) + except iris.exceptions.ConstraintMismatchError as constraint_error: + raise ValueError(f"No data available for variable {rawvar} in file" + f" {raw_info['file']}") from constraint_error + + # Remove ancillary data + for ancillary_variable in cube.ancillary_variables(): + cube.remove_ancillary_variable(ancillary_variable) + return cube + + +def get_monthly_cube(cfg, var, vals, raw_info, attrs, + inpfile_pattern, year, month): + data_cubes = [] + month_inpfile_pattern = inpfile_pattern.format( + year=str(year)+"{:02}".format(month)) + logger.info("Pattern: %s", month_inpfile_pattern) + inpfiles = sorted(glob.glob(month_inpfile_pattern)) + if inpfiles == []: + logger.error("Could not find any files with this" + " pattern %s", month_inpfile_pattern) + raise ValueError + logger.info("Found input files: %s", inpfiles) + + for inpfile in inpfiles: + raw_info['file'] = inpfile + logger.info("CMORizing var %s from file type %s", var, + raw_info['file']) + data_cubes.append(extract_variable(raw_info)) + + cube = concatenate(data_cubes) + + # regridding from 0.05x0.05 to 0.5x0.5 (not for uncertainty field + if 'Stderr' not in var: + cube = regrid(cube, target_grid='0.5x0.5', scheme='area_weighted') + + # Fix dtype + utils.fix_dtype(cube) + # Fix units + cmor_info = cfg['cmor_table'].get_variable(vals['mip'][0], var) + cube.convert_units(cmor_info.units) + # Fix metadata + fix_var_metadata(cube, cmor_info) + # Fix coordinates + fix_coords(cube) + cube.coord('time').long_name = 'time' + cube.coord('latitude').long_name = 'latitude' + cube.coord('longitude').long_name = 'longitude' + # Fix monthly time bounds + time = cube.coord('time') + time.bounds = get_time_bounds(time, vals['frequency']) + + # set global attributes set_global_atts(cube, attrs) + # add comment to tosStderr + if var == 'tosStderr': + cube.attributes['comment'] = ('Note that the variable tsStderr is an ' + 'uncertainty not a standard error.') + return cube def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date): """Cmorization func call.""" - cmor_table = cfg['cmor_table'] - glob_attrs = cfg['attributes'] + glob_attrs = copy.deepcopy(cfg['attributes']) # run the cmorization for var, vals in cfg['variables'].items(): - var_info = cmor_table.get_variable(vals['mip'], var) - glob_attrs['mip'] = vals['mip'] - raw_info = {'name': vals['raw'], 'file': vals['file']} - inpfile = os.path.join(in_dir, cfg['filename']) - logger.info("CMORizing var %s from file type %s", var, inpfile) - years = range(1982, 2020) - months = ["0" + str(mo) for mo in range(1, 10)] + ["10", "11", "12"] - for year in years: - monthly_cubes = [] - for month in months: - raw_info['file'] = inpfile.format(year=year, month=month) - logger.info("CMORizing var %s from file type %s", var, - raw_info['file']) - cube = extract_variable(var_info, raw_info, glob_attrs, year) - monthly_cubes.append(cube) - yearly_cube = concatenate(monthly_cubes) - save_variable(yearly_cube, - var, - out_dir, - glob_attrs, - unlimited_dimensions=['time']) + if not start_date: + start_date = datetime(vals['start_year'], 1, 1) + if not end_date: + end_date = datetime(vals['end_year'], 12, 31) + raw_info = {'name': vals['raw']} + inpfile_pattern = os.path.join(in_dir, '{year}*' + vals['filename']) + logger.info("CMORizing var %s from file type %s", var, inpfile_pattern) + mon_cubes = [] + for year in range(start_date.year, end_date.year + 1): + logger.info("Processing year %s", year) + glob_attrs['mip'] = vals['mip'][0] + for month in range(start_date.month, end_date.month + 1): + monthly_cube = get_monthly_cube(cfg, var, vals, raw_info, + glob_attrs, inpfile_pattern, + year, month) + # Save daily data + save_variable(monthly_cube, + var, + out_dir, + glob_attrs, + unlimited_dimensions=['time']) + # Calculate monthly mean + if 'Stderr' not in var: + logger.info("Calculating monthly mean") + iris.coord_categorisation.add_month_number(monthly_cube, + 'time') + iris.coord_categorisation.add_year(monthly_cube, 'time') + monthly_cube = monthly_cube.aggregated_by( + ['month_number', 'year'], + iris.analysis.MEAN) + monthly_cube.remove_coord('month_number') + monthly_cube.remove_coord('year') + mon_cubes.append(monthly_cube) + # Save monthly data + if 'Stderr' not in var: + yearly_cube = concatenate(mon_cubes) + glob_attrs['mip'] = vals['mip'][1] + save_variable(yearly_cube, + var, + out_dir, + glob_attrs, + unlimited_dimensions=['time']) + mon_cubes.clear() diff --git a/esmvaltool/recipes/examples/recipe_check_obs.yml b/esmvaltool/recipes/examples/recipe_check_obs.yml index 880aef831a..67a00b83f5 100644 --- a/esmvaltool/recipes/examples/recipe_check_obs.yml +++ b/esmvaltool/recipes/examples/recipe_check_obs.yml @@ -327,12 +327,14 @@ diagnostics: ESACCI-SST: description: ESACCI-SST check variables: - ts: - tsStderr: + tos: + additional_datasets: + - {dataset: ESACCI-SST, project: OBS6, mip: Omon, tier: 2, + type: sat, version: 3.0-L4-analysis, timerange: '198001/202112'} + tosStderr: additional_datasets: - - {dataset: ESACCI-SST, project: OBS, mip: Amon, tier: 2, - type: sat, version: 2.2, - start_year: 1982, end_year: 2019} + - {dataset: ESACCI-SST, project: OBS6, mip: Oday, tier: 2, + type: sat, version: 3.0-L4-analysis, timerange: '202001/202112'} scripts: null diff --git a/esmvaltool/references/esacci-sst.bibtex b/esmvaltool/references/esacci-sst.bibtex index 30eafc7756..278e41966e 100644 --- a/esmvaltool/references/esacci-sst.bibtex +++ b/esmvaltool/references/esacci-sst.bibtex @@ -1,13 +1,12 @@ @article{esacci-sst, - doi = {10.1038/s41597-019-0236-x}, - url = {https://doi.org/10.1038/s41597-019-0236-x}, - year = 2019, - month = {oct}, - publisher = {Springer Nature}, - volume = {6}, - number = {1}, - pages = {223}, - author = {Christopher J. Merchant and Owen Embury and Claire E. Bulgin and Thomas Block and Gary K. Corlett and Emma Fiedler and Simon A. Good and Jonathan Mittaz and Nick A. Rayner and David Berry and Steinar Eastwood and Michael Taylor and Yoko Tsushima and Alison Waterfall and Ruth Wilson and Craig Donlon}, - title = {Satellite-based time-series of sea-surface temperature since 1981 for climate applications ({SST} {CCI})}, - journal = {Scientific Data} + doi = {10.1038/s41597-024-03147-w}, + url = {https://doi.org/10.1038/s41597-024-03147-w}, + year = 2024, + journal = {Sci Data}, + volume = {11}, + number = {1}, + pages = {326}, + publisher = {{Nature Publishing Group}}, + author = {Embury, O. and Merchant, C.J. and Good, S.A. and Rayner, N.A. and Høyer, J.L. and Atkinson, C. and Block, T. and Alerskans, E. and Pearson, K.J. and Worsfold, M. and McCarroll, N. and Donlon, C}, + title = {Satellite-based time-series of sea-surface temperature since 1980 for climate applications}, }