From 5333f6ec603c52da7dcc15aeab9cd6fa0e0e5889 Mon Sep 17 00:00:00 2001 From: Julien Lenhardt Date: Tue, 17 Dec 2024 17:33:06 +0100 Subject: [PATCH 1/3] Additional files for new NOAA GML surface flask measurements datasets --- doc/sphinx/source/input.rst | 6 + .../NOAA-GML-SURFACE-FLASK-CH4.yml | 25 ++ .../NOAA-GML-SURFACE-FLASK-CO2.yml | 25 ++ .../NOAA-GML-SURFACE-FLASK-N2O.yml | 25 ++ esmvaltool/cmorizers/data/datasets.yml | 24 ++ .../datasets/noaa_gml_surface_flask_ch4.py | 38 ++ .../datasets/noaa_gml_surface_flask_co2.py | 38 ++ .../datasets/noaa_gml_surface_flask_n2o.py | 38 ++ .../datasets/noaa_gml_surface_flask.py | 340 ++++++++++++++++++ .../datasets/noaa_gml_surface_flask_ch4.py | 340 ++++++++++++++++++ .../datasets/noaa_gml_surface_flask_co2.py | 340 ++++++++++++++++++ .../datasets/noaa_gml_surface_flask_n2o.py | 340 ++++++++++++++++++ .../recipes/examples/recipe_check_obs.yml | 27 ++ .../noaa-gml-surface-flask-ch4.bibtex | 8 + .../noaa-gml-surface-flask-co2.bibtex | 8 + .../noaa-gml-surface-flask-n2o.bibtex | 8 + 16 files changed, 1630 insertions(+) create mode 100644 esmvaltool/cmorizers/data/cmor_config/NOAA-GML-SURFACE-FLASK-CH4.yml create mode 100644 esmvaltool/cmorizers/data/cmor_config/NOAA-GML-SURFACE-FLASK-CO2.yml create mode 100644 esmvaltool/cmorizers/data/cmor_config/NOAA-GML-SURFACE-FLASK-N2O.yml create mode 100644 esmvaltool/cmorizers/data/downloaders/datasets/noaa_gml_surface_flask_ch4.py create mode 100644 esmvaltool/cmorizers/data/downloaders/datasets/noaa_gml_surface_flask_co2.py create mode 100644 esmvaltool/cmorizers/data/downloaders/datasets/noaa_gml_surface_flask_n2o.py create mode 100644 esmvaltool/cmorizers/data/formatters/datasets/noaa_gml_surface_flask.py create mode 100644 esmvaltool/cmorizers/data/formatters/datasets/noaa_gml_surface_flask_ch4.py create mode 100644 esmvaltool/cmorizers/data/formatters/datasets/noaa_gml_surface_flask_co2.py create mode 100644 esmvaltool/cmorizers/data/formatters/datasets/noaa_gml_surface_flask_n2o.py create mode 100644 esmvaltool/references/noaa-gml-surface-flask-ch4.bibtex create mode 100644 esmvaltool/references/noaa-gml-surface-flask-co2.bibtex create mode 100644 esmvaltool/references/noaa-gml-surface-flask-n2o.bibtex diff --git a/doc/sphinx/source/input.rst b/doc/sphinx/source/input.rst index f9bcfafc3e..c9237e0c0b 100644 --- a/doc/sphinx/source/input.rst +++ b/doc/sphinx/source/input.rst @@ -437,6 +437,12 @@ A list of the datasets for which a CMORizers is available is provided in the fol +------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+ | NOAA-MBL-CH4 | ch4s (Amon) | 2 | Python | +------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+ +| NOAA-GML-SURFACE-FLASK-CH4 | ch4s (Amon) | 2 | Python | ++------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+ +| NOAA-GML-SURFACE-FLASK-CO2 | co2s (Amon) | 2 | Python | ++------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+ +| NOAA-GML-SURFACE-FLASK-N2O | n2os (Amon) | 2 | Python | ++------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+ | NOAAGlobalTemp | tasa (Amon) | 2 | Python | +------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+ | NSIDC-0116-[nh|sh] [#note4]_ | usi, vsi (day) | 3 | Python | diff --git a/esmvaltool/cmorizers/data/cmor_config/NOAA-GML-SURFACE-FLASK-CH4.yml b/esmvaltool/cmorizers/data/cmor_config/NOAA-GML-SURFACE-FLASK-CH4.yml new file mode 100644 index 0000000000..a2da13c2ce --- /dev/null +++ b/esmvaltool/cmorizers/data/cmor_config/NOAA-GML-SURFACE-FLASK-CH4.yml @@ -0,0 +1,25 @@ +--- +# Filename +filename: 'ch4_surface-flask_ccgg_text.tar.gz' + +# Trace gas +trace_gas: ch4 + +# Common global attributes for Cmorizer output +attributes: + dataset_id: NOAA-GML-SURFACE-FLASK-CH4 + version: '1.0' + tier: 2 + modeling_realm: atmos + project_id: OBS6 + source: 'https://gml.noaa.gov/aftp/data/trace_gases/ch4/flask/surface/ch4_surface-flask_ccgg_text.tar.gz' + reference: 'noaa-gml-surface-flask-ch4' + +# Variables to cmorize +variables: + ch4s: + mip: Amon + raw_name: ch4s + raw_units: 'mol mol-1' + standard_name: mole_fraction_of_methane_in_air + long_name: 'Mole Fraction of CH4' diff --git a/esmvaltool/cmorizers/data/cmor_config/NOAA-GML-SURFACE-FLASK-CO2.yml b/esmvaltool/cmorizers/data/cmor_config/NOAA-GML-SURFACE-FLASK-CO2.yml new file mode 100644 index 0000000000..cb95e1711f --- /dev/null +++ b/esmvaltool/cmorizers/data/cmor_config/NOAA-GML-SURFACE-FLASK-CO2.yml @@ -0,0 +1,25 @@ +--- +# Filename +filename: 'co2_surface-flask_ccgg_text.tar.gz' + +# Trace gas +trace_gas: co2 + +# Common global attributes for Cmorizer output +attributes: + dataset_id: NOAA-GML-SURFACE-FLASK-CO2 + version: '1.0' + tier: 2 + modeling_realm: atmos + project_id: OBS6 + source: 'https://gml.noaa.gov/aftp/data/trace_gases/co2/flask/surface/co2_surface-flask_ccgg_text.tar.gz' + reference: 'noaa-gml-surface-flask-co2' + +# Variables to cmorize +variables: + co2s: + mip: Amon + raw_name: co2s + raw_units: 'micromol mol-1' + standard_name: mole_fraction_of_carbon_dioxide_in_air + long_name: 'Mole Fraction of CO2' diff --git a/esmvaltool/cmorizers/data/cmor_config/NOAA-GML-SURFACE-FLASK-N2O.yml b/esmvaltool/cmorizers/data/cmor_config/NOAA-GML-SURFACE-FLASK-N2O.yml new file mode 100644 index 0000000000..beca6ea1da --- /dev/null +++ b/esmvaltool/cmorizers/data/cmor_config/NOAA-GML-SURFACE-FLASK-N2O.yml @@ -0,0 +1,25 @@ +--- +# Filename +filename: 'n2o_surface-flask_ccgg_text.tar.gz' + +# Trace gas +trace_gas: n2o + +# Common global attributes for Cmorizer output +attributes: + dataset_id: NOAA-GML-SURFACE-FLASK-N2O + version: '1.0' + tier: 2 + modeling_realm: atmos + project_id: OBS6 + source: 'https://gml.noaa.gov/aftp/data/trace_gases/n2o/flask/surface/n2o_surface-flask_ccgg_text.tar.gz' + reference: 'noaa-gml-surface-flask-n2o' + +# Variables to cmorize +variables: + n2os: + mip: Amon + raw_name: n2os + raw_units: 'mol mol-1' + standard_name: mole_fraction_of_nitrous_oxide_in_air + long_name: 'Mole Fraction of N2O' diff --git a/esmvaltool/cmorizers/data/datasets.yml b/esmvaltool/cmorizers/data/datasets.yml index 4c7c168009..42603afb4b 100644 --- a/esmvaltool/cmorizers/data/datasets.yml +++ b/esmvaltool/cmorizers/data/datasets.yml @@ -1103,6 +1103,30 @@ datasets: Download the following files: ersst.v5.yyyymm.nc for years 1854 onwards + + NOAA-GML-SURFACE-FLASK-CH4: + tier: 2 + source: https://gml.noaa.gov/aftp/data/trace_gases/ch4/flask/surface/ + last_access: 2024-07-30 + info: | + Download the following archive: + ch4_surface-flask_ccgg_text.tar.gz + + NOAA-GML-SURFACE-FLASK-CO2: + tier: 2 + source: https://gml.noaa.gov/aftp/data/trace_gases/co2/flask/surface/ + last_access: 2024-07-30 + info: | + Download the following archive: + co2_surface-flask_ccgg_text.tar.gz + + NOAA-GML-SURFACE-FLASK-N2O: + tier: 2 + source: https://gml.noaa.gov/aftp/data/trace_gases/n2o/flask/surface/ + last access: 2024-07-30 + info: | + Download the following archive: + n2o_surface-flask_ccgg_text.tar.gz NOAAGlobalTemp: tier: 2 diff --git a/esmvaltool/cmorizers/data/downloaders/datasets/noaa_gml_surface_flask_ch4.py b/esmvaltool/cmorizers/data/downloaders/datasets/noaa_gml_surface_flask_ch4.py new file mode 100644 index 0000000000..506052a668 --- /dev/null +++ b/esmvaltool/cmorizers/data/downloaders/datasets/noaa_gml_surface_flask_ch4.py @@ -0,0 +1,38 @@ +"""Script to download NOAA Global Monitoring Lab surface flask data +for CH4 from NOAA's archive.""" +import logging + +from esmvaltool.cmorizers.data.downloaders.wget import WGetDownloader + +logger = logging.getLogger(__name__) + + +def download_dataset(config, dataset, dataset_info, + start_date, end_date, overwrite): + """Download dataset. + + Parameters + ---------- + config : dict + ESMValTool's user configuration + dataset : str + Name of the dataset + dataset_info : dict + Dataset information from the datasets.yml file + start_date : datetime + Start of the interval to download + end_date : datetime + End of the interval to download + overwrite : bool + Overwrite already downloaded files + """ + downloader = WGetDownloader( + config=config, + dataset=dataset, + dataset_info=dataset_info, + overwrite=overwrite, + ) + downloader.download_file( + "https://gml.noaa.gov/aftp/data/trace_gases/ch4/flask/surface/ch4_surface-flask_ccgg_text.tar.gz", + wget_options=[], + ) diff --git a/esmvaltool/cmorizers/data/downloaders/datasets/noaa_gml_surface_flask_co2.py b/esmvaltool/cmorizers/data/downloaders/datasets/noaa_gml_surface_flask_co2.py new file mode 100644 index 0000000000..dc90249ecf --- /dev/null +++ b/esmvaltool/cmorizers/data/downloaders/datasets/noaa_gml_surface_flask_co2.py @@ -0,0 +1,38 @@ +"""Script to download NOAA Global Monitoring Lab surface flask data +for CO2 from NOAA's archive.""" +import logging + +from esmvaltool.cmorizers.data.downloaders.wget import WGetDownloader + +logger = logging.getLogger(__name__) + + +def download_dataset(config, dataset, dataset_info, + start_date, end_date, overwrite): + """Download dataset. + + Parameters + ---------- + config : dict + ESMValTool's user configuration + dataset : str + Name of the dataset + dataset_info : dict + Dataset information from the datasets.yml file + start_date : datetime + Start of the interval to download + end_date : datetime + End of the interval to download + overwrite : bool + Overwrite already downloaded files + """ + downloader = WGetDownloader( + config=config, + dataset=dataset, + dataset_info=dataset_info, + overwrite=overwrite, + ) + downloader.download_file( + "https://gml.noaa.gov/aftp/data/trace_gases/co2/flask/surface/co2_surface-flask_ccgg_text.tar.gz", + wget_options=[], + ) diff --git a/esmvaltool/cmorizers/data/downloaders/datasets/noaa_gml_surface_flask_n2o.py b/esmvaltool/cmorizers/data/downloaders/datasets/noaa_gml_surface_flask_n2o.py new file mode 100644 index 0000000000..53c1565e2e --- /dev/null +++ b/esmvaltool/cmorizers/data/downloaders/datasets/noaa_gml_surface_flask_n2o.py @@ -0,0 +1,38 @@ +"""Script to download NOAA Global Monitoring Lab surface flask data +for N2O from NOAA's archive.""" +import logging + +from esmvaltool.cmorizers.data.downloaders.wget import WGetDownloader + +logger = logging.getLogger(__name__) + + +def download_dataset(config, dataset, dataset_info, + start_date, end_date, overwrite): + """Download dataset. + + Parameters + ---------- + config : dict + ESMValTool's user configuration + dataset : str + Name of the dataset + dataset_info : dict + Dataset information from the datasets.yml file + start_date : datetime + Start of the interval to download + end_date : datetime + End of the interval to download + overwrite : bool + Overwrite already downloaded files + """ + downloader = WGetDownloader( + config=config, + dataset=dataset, + dataset_info=dataset_info, + overwrite=overwrite, + ) + downloader.download_file( + "https://gml.noaa.gov/aftp/data/trace_gases/n2o/flask/surface/n2o_surface-flask_ccgg_text.tar.gz", + wget_options=[], + ) diff --git a/esmvaltool/cmorizers/data/formatters/datasets/noaa_gml_surface_flask.py b/esmvaltool/cmorizers/data/formatters/datasets/noaa_gml_surface_flask.py new file mode 100644 index 0000000000..8cc3d735a7 --- /dev/null +++ b/esmvaltool/cmorizers/data/formatters/datasets/noaa_gml_surface_flask.py @@ -0,0 +1,340 @@ +"""ESMValTool CMORizer for NOAA GML surface flask data. + +Tier + Tier 2: freely available dataset. + +Source + https://gml.noaa.gov/ + +Last access + 20240730 + +Download and processing instructions + Download one of the following file: + https://gml.noaa.gov/aftp/data/trace_gases/ch4/flask/surface/ch4_surface-flask_ccgg_text.tar.gz + https://gml.noaa.gov/aftp/data/trace_gases/co2/flask/surface/co2_surface-flask_ccgg_text.tar.gz + https://gml.noaa.gov/aftp/data/trace_gases/n2o/flask/surface/n2o_surface-flask_ccgg_text.tar.gz +""" + +import os +import logging +import cf_units +import dask.array as da +import iris +import iris.coords +import iris.cube +import pandas as pd +import numpy as np +from datetime import datetime +from typing import NamedTuple +from pys2index import S2PointIndex +from fsspec.implementations.tar import TarFileSystem + +from esmvaltool.cmorizers.data import utilities as utils + +logger = logging.getLogger(__name__) + +FLASK_COLUMNS = ['site', 'year', 'month', 'value'] +DTYPE_FLASK_COLUMNS = {'site': str, 'year': int, 'month': int, 'value': float} +TRACE_GAS_UNITS = {'ch4s': '1e-09', 'co2s': '1e-06', 'n2os': '1e-09'} + +class FlaskStation(NamedTuple): + """NOAA GML surface flask station data.""" + + site_code: str + site_name: str + site_country: str + site_latitude: float + site_longitude: float + site_elevation: float + site_utc2lst: str + data_frame: pd.DataFrame + + +class FlaskStations(NamedTuple): + """NOAA GML surface flask station data.""" + + site_code: list[str] + site_name: list[str] + site_country: list[str] + site_latitude: list[float] + site_longitude: list[float] + site_elevation: list[float] + site_utc2lst: list[str] + data_frame: list[pd.DataFrame] + + +def load_file(filesystem, filepath, filelist): + """Load NOAA GML surface flask station data from the text file.""" + # Determine how many lines to skip in the header + skiprows = 0 + with filesystem.open(filepath, mode='rt') as file: + for line in file: + if line.startswith("#"): + skiprows = skiprows + 1 + # Read file as CSV + with filesystem.open(filepath, mode='rt') as file: + data_frame = pd.read_csv( + file, + delimiter=r'[\s]{1,20}', + skiprows=skiprows, + header=None, + names=FLASK_COLUMNS, + dtype=DTYPE_FLASK_COLUMNS, + engine='python' + ) + # Fetch data from event file : code, full_name, country, latitude, longitude, elevation, timezone + # Check first if the surface-flask or shipboard-flask file exists + filepath_event_alt1 = filepath.replace('month', 'event') + filepath_event_alt2 = filepath.replace('month', 'event').replace( + 'surface-flask_1_ccgg', 'shipboard-flask_1_ccgg') + filepath_event = None + if filepath_event_alt1 in filelist: + filepath_event = filepath_event_alt1 + elif filepath_event_alt2 in filelist: + filepath_event = filepath_event_alt2 + # Setup default values for additional attributes + site_code = filepath.split('/')[-1].split('_')[1].upper() + site_name = 'N/A' + site_country = 'N/A' + site_latitude = np.nan + site_longitude = np.nan + site_elevation = np.nan + site_utc2lst = 'N/A' + # Fetch attributes in event file if it exists + if filepath_event is not None: + with filesystem.open(filepath_event, mode='rt') as file: + for line in file: + # Observation site code + if line.startswith('# site_code :'): + site_code = line.strip().split(' : ')[-1] + # Site full name + if line.startswith('# site_name :'): + site_name = line.strip().split(' : ')[-1] + # Site country + if line.startswith('# site_country :'): + site_country = line.strip().split(' : ')[-1] + # Site latitude + if line.startswith('# site_latitude :'): + site_latitude = float(line.strip().split(' : ')[-1]) + # Site longitude + if line.startswith('# site_longitude :'): + site_longitude = float(line.strip().split(' : ')[-1]) + # Site elevation + if line.startswith('# site_elevation :'): + site_elevation = float(line.strip().split(' : ')[-1]) + # Site timezone + if line.startswith('# site_utc2lst :'): + site_utc2lst = line.strip().split(' : ')[-1] + # Check if site location is available otherwise return None + if np.any(np.isnan([site_latitude, site_longitude])): + return None + else: + # Datetime index + data_frame.index = pd.to_datetime( + data_frame['year'].astype(str) + '-' + data_frame['month'].astype(str)) + # Create FlaskCO2Station object + station = FlaskStation( + site_code, + site_name, + site_country, + site_latitude, + site_longitude, + site_elevation, + site_utc2lst, + data_frame + ) + return station + + +def merge_stations(stations): + """Collect and merge station data into a FlaskStations instance.""" + columns = {} + for name, dtype in ( + ("site_code", str), + ("site_name", str), + ("site_country", str), + ("site_latitude", np.float64), + ("site_longitude", np.float64), + ("site_elevation", np.float64), + ("site_utc2lst", str), + ("data_frame", object), + ): + columns[name] = np.array( + [getattr(station, name) for station in stations], + dtype=dtype, + ) + return FlaskStations(**columns) + + +def assemble_cube(stations, idx, var_attrs): + """Assemble Iris cube with station data. + + Parameters + ---------- + stations : FlaskStations + Station data + idx : int + Unique ids of all stations + var_attrs : dictionnary + Contains attributes related to the trace gas + + Returns + ------- + Iris cube + Iris cube with station data. + + Raises + ------ + ValueError + If station data has inconsistent variable names. + """ + min_time = np.array([df.index.min() for df in stations.data_frame]).min() + max_time = np.array([df.index.max() for df in stations.data_frame]).max() + date_index = pd.date_range(min_time, max_time, freq="MS") + data_frames = [df.reindex(index=date_index) for df in stations.data_frame] + all_data_columns = np.unique( + np.array([df.columns for df in data_frames], dtype=str), + axis=0, + ) + if len(all_data_columns) != 1: + raise ValueError( + "Station data frames has different sets of column names." + ) + + trace_gas = da.stack([ + df["value"].values for df in data_frames + ], axis=-1)[..., idx] + + times = date_index.to_pydatetime() + time_points = np.array( + [datetime(year=t.year, month=t.month, day=15) for t in times]) + time_bounds_lower = times + time_bounds_upper = np.array([ + datetime(year=t.year + (t.month == 12), + month=t.month + 1 - (t.month == 12) * 12, + day=1) for t in times + ]) + time_bounds = np.stack([time_bounds_lower, time_bounds_upper], axis=-1) + time_units = cf_units.Unit("days since 1850-01-01", calendar="standard") + time_coord = iris.coords.DimCoord( + points=time_units.date2num(time_points), + standard_name="time", + long_name="time", + var_name="time", + units=time_units, + bounds=time_units.date2num(time_bounds), + ) + index_coord = iris.coords.DimCoord( + points=da.arange(trace_gas.shape[1]), + standard_name=None, + long_name="Station index (arbitrary)", + var_name="station_index", + units="1", + ) + code_coord = iris.coords.AuxCoord( + points=stations.site_code[idx], + standard_name="platform_name", + long_name="NOAA GML CCGG Site Name", + var_name="site_code", + ) + elevation_coord = iris.coords.AuxCoord( + points=stations.site_elevation[idx], + standard_name="height_above_mean_sea_level", + long_name="Elevation", + var_name="elev", + units="m", + ) + latitude_coord = iris.coords.AuxCoord( + points=stations.site_latitude[idx], + standard_name="latitude", + long_name="Latitude", + var_name="lat", + units="degrees_north", + ) + longitude_coord = iris.coords.AuxCoord( + points=stations.site_longitude[idx], + standard_name="longitude", + long_name="Longitude", + var_name="lon", + units="degrees_east", + ) + cube = iris.cube.Cube( + data=da.ma.masked_array(trace_gas, da.isnan(trace_gas), fill_value=-999.999), + standard_name=(var_attrs['standard_name']), + long_name=var_attrs['long_name'], + var_name=var_attrs['raw_name'], + units=TRACE_GAS_UNITS[var_attrs['raw_name']], # var_attrs['raw_units'], + dim_coords_and_dims=[ + (time_coord, 0), + (index_coord, 1), + ], + aux_coords_and_dims=[ + (latitude_coord, 1), + (longitude_coord, 1), + (elevation_coord, 1), + (code_coord, 1), + ] + ) + return cube + + +def build_cube(filesystem, paths, filelist, var_attrs): + """Build station data cube.""" + individual_stations = [ + load_file(filesystem, file_path, filelist) for file_path in paths + ] + individual_stations = [s for s in individual_stations if s is not None] + stations = merge_stations(individual_stations) + latlon_points = np.stack([stations.site_latitude, stations.site_longitude], axis=-1) + index = S2PointIndex(latlon_points) + cell_ids = index.get_cell_ids() + idx = np.argsort(cell_ids) + cube = assemble_cube(stations, idx, var_attrs) + return cube + + +def cmorization_noaa_gml_surface_flask_trace_gas( + in_dir, out_dir, cfg, cfg_user, start_date, end_date): + """Cmorization func call.""" + raw_filename = cfg['filename'] + + tar_file_system = TarFileSystem(f"{in_dir}/{raw_filename}") + paths = tar_file_system.glob( + f'{cfg['trace_gas']}_surface-flask_ccgg_text/{cfg['trace_gas']}_*_month.txt') + filelist = tar_file_system.glob( + f'{cfg['trace_gas']}_surface-flask_ccgg_text/{cfg['trace_gas']}_*.txt') + + versions = np.unique( + np.array([os.path.basename(p).split("_")[-3] for p in paths], + dtype=str)) + if len(versions) != 1: + raise ValueError( + "All station datasets in tar file must have same version." + ) + version = versions[0] + + var_attrs = cfg['variables'][f'{cfg['trace_gas']}s'] + cube = build_cube(tar_file_system, paths, filelist, var_attrs) + + attrs = cfg['attributes'].copy() + attrs['version'] = version + attrs['source'] = attrs['source'] + + # Run the cmorization + for (short_name, var) in cfg['variables'].items(): + logger.info("CMORizing variable '%s'", short_name) + + attrs['mip'] = var['mip'] + + # Fix metadata + utils.set_global_atts(cube, attrs) + + # Save variable + utils.save_variable( + cube, + short_name, + out_dir, + attrs, + unlimited_dimensions=['time'], + ) diff --git a/esmvaltool/cmorizers/data/formatters/datasets/noaa_gml_surface_flask_ch4.py b/esmvaltool/cmorizers/data/formatters/datasets/noaa_gml_surface_flask_ch4.py new file mode 100644 index 0000000000..7c5f2a848a --- /dev/null +++ b/esmvaltool/cmorizers/data/formatters/datasets/noaa_gml_surface_flask_ch4.py @@ -0,0 +1,340 @@ +"""ESMValTool CMORizer for NOAA GML surface flask CH4 data. + +Tier + Tier 2: freely available dataset. + +Source + https://gml.noaa.gov/ + +Last access + 20240730 + +Download and processing instructions + Download the following file: + https://gml.noaa.gov/aftp/data/trace_gases/ch4/flask/surface/ch4_surface-flask_ccgg_text.tar.gz +""" + +from esmvaltool.cmorizers.data.formatters.datasets.noaa_gml_surface_flask import cmorization_noaa_gml_surface_flask_trace_gas + + +def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date): + """Cmorization func call.""" + cmorization_noaa_gml_surface_flask_trace_gas(in_dir, out_dir, cfg, cfg_user, start_date, end_date) + + +''' +import os +import logging +import cf_units +import dask.array as da +import iris +import iris.coords +import iris.cube +import pandas as pd +import numpy as np +from datetime import datetime +from typing import NamedTuple +from pys2index import S2PointIndex +from fsspec.implementations.tar import TarFileSystem + +from esmvaltool.cmorizers.data import utilities as utils + +logger = logging.getLogger(__name__) + +FLASK_COLUMNS = ['site', 'year', 'month', 'value'] +DTYPE_FLASK_COLUMNS = {'site': str, 'year': int, 'month': int, 'value': float} + +class FlaskCH4Station(NamedTuple): + """NOAA GML surface flask CH4 station data.""" + + site_code: str + site_name: str + site_country: str + site_latitude: float + site_longitude: float + site_elevation: float + site_utc2lst: str + data_frame: pd.DataFrame + + +class FlaskCH4Stations(NamedTuple): + """NOAA GML surface flask CH4 station data.""" + + site_code: list[str] + site_name: list[str] + site_country: list[str] + site_latitude: list[float] + site_longitude: list[float] + site_elevation: list[float] + site_utc2lst: list[str] + data_frame: list[pd.DataFrame] + + +def load_file(filesystem, filepath, filelist): + """Load NOAA GML surface flask CH4 station data from the text file.""" + # Determine how many lines to skip in the header + skiprows = 0 + with filesystem.open(filepath, mode='rt') as file: + for line in file: + if line.startswith("#"): + skiprows = skiprows + 1 + # Read file as CSV + with filesystem.open(filepath, mode='rt') as file: + data_frame = pd.read_csv( + file, + delimiter=r'[\s]{1,20}', + skiprows=skiprows, + header=None, + names=FLASK_COLUMNS, + dtype=DTYPE_FLASK_COLUMNS, + engine='python' + ) + # Fetch data from event file : code, full_name, country, latitude, longitude, elevation, timezone + # Check first if the surface-flask or shipboard-flask file exists + filepath_event_alt1 = filepath.replace('month', 'event') + filepath_event_alt2 = filepath.replace('month', 'event').replace( + 'surface-flask_1_ccgg', 'shipboard-flask_1_ccgg') + filepath_event = None + if filepath_event_alt1 in filelist: + filepath_event = filepath_event_alt1 + elif filepath_event_alt2 in filelist: + filepath_event = filepath_event_alt2 + # Setup default values for additional attributes + site_code = filepath.split('/')[-1].split('_')[1].upper() + site_name = 'N/A' + site_country = 'N/A' + site_latitude = np.nan + site_longitude = np.nan + site_elevation = np.nan + site_utc2lst = 'N/A' + # Fetch attributes in event file if it exists + if filepath_event is not None: + with filesystem.open(filepath_event, mode='rt') as file: + for line in file: + # Observation site code + if line.startswith('# site_code :'): + site_code = line.strip().split(' : ')[-1] + # Site full name + if line.startswith('# site_name :'): + site_name = line.strip().split(' : ')[-1] + # Site country + if line.startswith('# site_country :'): + site_country = line.strip().split(' : ')[-1] + # Site latitude + if line.startswith('# site_latitude :'): + site_latitude = float(line.strip().split(' : ')[-1]) + # Site longitude + if line.startswith('# site_longitude :'): + site_longitude = float(line.strip().split(' : ')[-1]) + # Site elevation + if line.startswith('# site_elevation :'): + site_elevation = float(line.strip().split(' : ')[-1]) + # Site timezone + if line.startswith('# site_utc2lst :'): + site_utc2lst = line.strip().split(' : ')[-1] + # Check if site location is available otherwise return None + if np.any(np.isnan([site_latitude, site_longitude])): + return None + else: + # Datetime index + data_frame.index = pd.to_datetime( + data_frame['year'].astype(str) + '-' + data_frame['month'].astype(str)) + # Create FlaskCH4Station object + station = FlaskCH4Station( + site_code, + site_name, + site_country, + site_latitude, + site_longitude, + site_elevation, + site_utc2lst, + data_frame + ) + return station + + +def merge_stations(stations): + """Collect and merge station data into a FlaskCH4Stations instance.""" + columns = {} + for name, dtype in ( + ("site_code", str), + ("site_name", str), + ("site_country", str), + ("site_latitude", np.float64), + ("site_longitude", np.float64), + ("site_elevation", np.float64), + ("site_utc2lst", str), + ("data_frame", object), + ): + columns[name] = np.array( + [getattr(station, name) for station in stations], + dtype=dtype, + ) + return FlaskCH4Stations(**columns) + + +def assemble_cube(stations, idx): + """Assemble Iris cube with station data. + + Parameters + ---------- + stations : FlaskCH4Stations + Station data + idx : int + Unique ids of all stations + + Returns + ------- + Iris cube + Iris cube with station data. + + Raises + ------ + ValueError + If station data has inconsistent variable names. + """ + min_time = np.array([df.index.min() for df in stations.data_frame]).min() + max_time = np.array([df.index.max() for df in stations.data_frame]).max() + date_index = pd.date_range(min_time, max_time, freq="MS") + data_frames = [df.reindex(index=date_index) for df in stations.data_frame] + all_data_columns = np.unique( + np.array([df.columns for df in data_frames], dtype=str), + axis=0, + ) + if len(all_data_columns) != 1: + raise ValueError( + "Station data frames has different sets of column names." + ) + + ch4 = da.stack([ + df["value"].values for df in data_frames + ], axis=-1)[..., idx] + + times = date_index.to_pydatetime() + time_points = np.array( + [datetime(year=t.year, month=t.month, day=15) for t in times]) + time_bounds_lower = times + time_bounds_upper = np.array([ + datetime(year=t.year + (t.month == 12), + month=t.month + 1 - (t.month == 12) * 12, + day=1) for t in times + ]) + time_bounds = np.stack([time_bounds_lower, time_bounds_upper], axis=-1) + time_units = cf_units.Unit("days since 1850-01-01", calendar="standard") + time_coord = iris.coords.DimCoord( + points=time_units.date2num(time_points), + standard_name="time", + long_name="time", + var_name="time", + units=time_units, + bounds=time_units.date2num(time_bounds), + ) + index_coord = iris.coords.DimCoord( + points=da.arange(ch4.shape[1]), + standard_name=None, + long_name="Station index (arbitrary)", + var_name="station_index", + units="1", + ) + code_coord = iris.coords.AuxCoord( + points=stations.site_code[idx], + standard_name="platform_name", + long_name="NOAA GML CCGG Site Name", + var_name="site_code", + ) + elevation_coord = iris.coords.AuxCoord( + points=stations.site_elevation[idx], + standard_name="height_above_mean_sea_level", + long_name="Elevation", + var_name="elev", + units="m", + ) + latitude_coord = iris.coords.AuxCoord( + points=stations.site_latitude[idx], + standard_name="latitude", + long_name="Latitude", + var_name="lat", + units="degrees_north", + ) + longitude_coord = iris.coords.AuxCoord( + points=stations.site_longitude[idx], + standard_name="longitude", + long_name="Longitude", + var_name="lon", + units="degrees_east", + ) + cube = iris.cube.Cube( + data=da.ma.masked_array(ch4, da.isnan(ch4), fill_value=-999.999), + standard_name=( + "mole_fraction_of_methane_in_air"), + long_name="Mole Fraction of CH4", + var_name="ch4s", + units="mol mol-1", + dim_coords_and_dims=[ + (time_coord, 0), + (index_coord, 1), + ], + aux_coords_and_dims=[ + (latitude_coord, 1), + (longitude_coord, 1), + (elevation_coord, 1), + (code_coord, 1), + ] + ) + return cube + + +def build_cube(filesystem, paths, filelist): + """Build station data cube.""" + individual_stations = [ + load_file(filesystem, file_path, filelist) for file_path in paths + ] + individual_stations = [s for s in individual_stations if s is not None] + stations = merge_stations(individual_stations) + latlon_points = np.stack([stations.site_latitude, stations.site_longitude], axis=-1) + index = S2PointIndex(latlon_points) + cell_ids = index.get_cell_ids() + idx = np.argsort(cell_ids) + cube = assemble_cube(stations, idx) + return cube + + +def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date): + """Cmorization func call.""" + raw_filename = cfg['filename'] + + tar_file_system = TarFileSystem(f"{in_dir}/{raw_filename}") + paths = tar_file_system.glob("ch4_surface-flask_ccgg_text/ch4_*_month.txt") + filelist = tar_file_system.glob("ch4_surface-flask_ccgg_text/ch4_*.txt") + versions = np.unique( + np.array([os.path.basename(p).split("_")[-3] for p in paths], + dtype=str)) + if len(versions) != 1: + raise ValueError( + "All station datasets in tar file must have same version." + ) + version = versions[0] + cube = build_cube(tar_file_system, paths, filelist) + + attrs = cfg['attributes'].copy() + attrs['version'] = version + attrs['source'] = attrs['source'] + + # Run the cmorization + for (short_name, var) in cfg['variables'].items(): + logger.info("CMORizing variable '%s'", short_name) + + attrs['mip'] = var['mip'] + + # Fix metadata + utils.set_global_atts(cube, attrs) + + # Save variable + utils.save_variable( + cube, + short_name, + out_dir, + attrs, + unlimited_dimensions=['time'], + ) +''' \ No newline at end of file diff --git a/esmvaltool/cmorizers/data/formatters/datasets/noaa_gml_surface_flask_co2.py b/esmvaltool/cmorizers/data/formatters/datasets/noaa_gml_surface_flask_co2.py new file mode 100644 index 0000000000..179698d26a --- /dev/null +++ b/esmvaltool/cmorizers/data/formatters/datasets/noaa_gml_surface_flask_co2.py @@ -0,0 +1,340 @@ +"""ESMValTool CMORizer for NOAA GML surface flask CO2 data. + +Tier + Tier 2: freely available dataset. + +Source + https://gml.noaa.gov/ + +Last access + 20240730 + +Download and processing instructions + Download the following file: + https://gml.noaa.gov/aftp/data/trace_gases/co2/flask/surface/co2_surface-flask_ccgg_text.tar.gz +""" + +from esmvaltool.cmorizers.data.formatters.datasets.noaa_gml_surface_flask import cmorization_noaa_gml_surface_flask_trace_gas + + +def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date): + """Cmorization func call.""" + cmorization_noaa_gml_surface_flask_trace_gas(in_dir, out_dir, cfg, cfg_user, start_date, end_date) + + +''' +import os +import logging +import cf_units +import dask.array as da +import iris +import iris.coords +import iris.cube +import pandas as pd +import numpy as np +from datetime import datetime +from typing import NamedTuple +from pys2index import S2PointIndex +from fsspec.implementations.tar import TarFileSystem + +from esmvaltool.cmorizers.data import utilities as utils + +logger = logging.getLogger(__name__) + +FLASK_COLUMNS = ['site', 'year', 'month', 'value'] +DTYPE_FLASK_COLUMNS = {'site': str, 'year': int, 'month': int, 'value': float} + +class FlaskCO2Station(NamedTuple): + """NOAA GML surface flask CO2 station data.""" + + site_code: str + site_name: str + site_country: str + site_latitude: float + site_longitude: float + site_elevation: float + site_utc2lst: str + data_frame: pd.DataFrame + + +class FlaskCO2Stations(NamedTuple): + """NOAA GML surface flask CO2 station data.""" + + site_code: list[str] + site_name: list[str] + site_country: list[str] + site_latitude: list[float] + site_longitude: list[float] + site_elevation: list[float] + site_utc2lst: list[str] + data_frame: list[pd.DataFrame] + + +def load_file(filesystem, filepath, filelist): + """Load NOAA GML surface flask CO2 station data from the text file.""" + # Determine how many lines to skip in the header + skiprows = 0 + with filesystem.open(filepath, mode='rt') as file: + for line in file: + if line.startswith("#"): + skiprows = skiprows + 1 + # Read file as CSV + with filesystem.open(filepath, mode='rt') as file: + data_frame = pd.read_csv( + file, + delimiter=r'[\s]{1,20}', + skiprows=skiprows, + header=None, + names=FLASK_COLUMNS, + dtype=DTYPE_FLASK_COLUMNS, + engine='python' + ) + # Fetch data from event file : code, full_name, country, latitude, longitude, elevation, timezone + # Check first if the surface-flask or shipboard-flask file exists + filepath_event_alt1 = filepath.replace('month', 'event') + filepath_event_alt2 = filepath.replace('month', 'event').replace( + 'surface-flask_1_ccgg', 'shipboard-flask_1_ccgg') + filepath_event = None + if filepath_event_alt1 in filelist: + filepath_event = filepath_event_alt1 + elif filepath_event_alt2 in filelist: + filepath_event = filepath_event_alt2 + # Setup default values for additional attributes + site_code = filepath.split('/')[-1].split('_')[1].upper() + site_name = 'N/A' + site_country = 'N/A' + site_latitude = np.nan + site_longitude = np.nan + site_elevation = np.nan + site_utc2lst = 'N/A' + # Fetch attributes in event file if it exists + if filepath_event is not None: + with filesystem.open(filepath_event, mode='rt') as file: + for line in file: + # Observation site code + if line.startswith('# site_code :'): + site_code = line.strip().split(' : ')[-1] + # Site full name + if line.startswith('# site_name :'): + site_name = line.strip().split(' : ')[-1] + # Site country + if line.startswith('# site_country :'): + site_country = line.strip().split(' : ')[-1] + # Site latitude + if line.startswith('# site_latitude :'): + site_latitude = float(line.strip().split(' : ')[-1]) + # Site longitude + if line.startswith('# site_longitude :'): + site_longitude = float(line.strip().split(' : ')[-1]) + # Site elevation + if line.startswith('# site_elevation :'): + site_elevation = float(line.strip().split(' : ')[-1]) + # Site timezone + if line.startswith('# site_utc2lst :'): + site_utc2lst = line.strip().split(' : ')[-1] + # Check if site location is available otherwise return None + if np.any(np.isnan([site_latitude, site_longitude])): + return None + else: + # Datetime index + data_frame.index = pd.to_datetime( + data_frame['year'].astype(str) + '-' + data_frame['month'].astype(str)) + # Create FlaskCO2Station object + station = FlaskCO2Station( + site_code, + site_name, + site_country, + site_latitude, + site_longitude, + site_elevation, + site_utc2lst, + data_frame + ) + return station + + +def merge_stations(stations): + """Collect and merge station data into a FlaskCO2Stations instance.""" + columns = {} + for name, dtype in ( + ("site_code", str), + ("site_name", str), + ("site_country", str), + ("site_latitude", np.float64), + ("site_longitude", np.float64), + ("site_elevation", np.float64), + ("site_utc2lst", str), + ("data_frame", object), + ): + columns[name] = np.array( + [getattr(station, name) for station in stations], + dtype=dtype, + ) + return FlaskCO2Stations(**columns) + + +def assemble_cube(stations, idx): + """Assemble Iris cube with station data. + + Parameters + ---------- + stations : FlaskCO2Stations + Station data + idx : int + Unique ids of all stations + + Returns + ------- + Iris cube + Iris cube with station data. + + Raises + ------ + ValueError + If station data has inconsistent variable names. + """ + min_time = np.array([df.index.min() for df in stations.data_frame]).min() + max_time = np.array([df.index.max() for df in stations.data_frame]).max() + date_index = pd.date_range(min_time, max_time, freq="MS") + data_frames = [df.reindex(index=date_index) for df in stations.data_frame] + all_data_columns = np.unique( + np.array([df.columns for df in data_frames], dtype=str), + axis=0, + ) + if len(all_data_columns) != 1: + raise ValueError( + "Station data frames has different sets of column names." + ) + + co2 = da.stack([ + df["value"].values for df in data_frames + ], axis=-1)[..., idx] + + times = date_index.to_pydatetime() + time_points = np.array( + [datetime(year=t.year, month=t.month, day=15) for t in times]) + time_bounds_lower = times + time_bounds_upper = np.array([ + datetime(year=t.year + (t.month == 12), + month=t.month + 1 - (t.month == 12) * 12, + day=1) for t in times + ]) + time_bounds = np.stack([time_bounds_lower, time_bounds_upper], axis=-1) + time_units = cf_units.Unit("days since 1850-01-01", calendar="standard") + time_coord = iris.coords.DimCoord( + points=time_units.date2num(time_points), + standard_name="time", + long_name="time", + var_name="time", + units=time_units, + bounds=time_units.date2num(time_bounds), + ) + index_coord = iris.coords.DimCoord( + points=da.arange(co2.shape[1]), + standard_name=None, + long_name="Station index (arbitrary)", + var_name="station_index", + units="1", + ) + code_coord = iris.coords.AuxCoord( + points=stations.site_code[idx], + standard_name="platform_name", + long_name="NOAA GML CCGG Site Name", + var_name="site_code", + ) + elevation_coord = iris.coords.AuxCoord( + points=stations.site_elevation[idx], + standard_name="height_above_mean_sea_level", + long_name="Elevation", + var_name="elev", + units="m", + ) + latitude_coord = iris.coords.AuxCoord( + points=stations.site_latitude[idx], + standard_name="latitude", + long_name="Latitude", + var_name="lat", + units="degrees_north", + ) + longitude_coord = iris.coords.AuxCoord( + points=stations.site_longitude[idx], + standard_name="longitude", + long_name="Longitude", + var_name="lon", + units="degrees_east", + ) + cube = iris.cube.Cube( + data=da.ma.masked_array(co2, da.isnan(co2), fill_value=-999.999), + standard_name=( + "mole_fraction_of_carbon_dioxide_in_air"), + long_name="Mole Fraction of CO2", + var_name="co2s", + units="micromol mol-1", + dim_coords_and_dims=[ + (time_coord, 0), + (index_coord, 1), + ], + aux_coords_and_dims=[ + (latitude_coord, 1), + (longitude_coord, 1), + (elevation_coord, 1), + (code_coord, 1), + ] + ) + return cube + + +def build_cube(filesystem, paths, filelist): + """Build station data cube.""" + individual_stations = [ + load_file(filesystem, file_path, filelist) for file_path in paths + ] + individual_stations = [s for s in individual_stations if s is not None] + stations = merge_stations(individual_stations) + latlon_points = np.stack([stations.site_latitude, stations.site_longitude], axis=-1) + index = S2PointIndex(latlon_points) + cell_ids = index.get_cell_ids() + idx = np.argsort(cell_ids) + cube = assemble_cube(stations, idx) + return cube + + +def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date): + """Cmorization func call.""" + raw_filename = cfg['filename'] + + tar_file_system = TarFileSystem(f"{in_dir}/{raw_filename}") + paths = tar_file_system.glob("co2_surface-flask_ccgg_text/co2_*_month.txt") + filelist = tar_file_system.glob("co2_surface-flask_ccgg_text/co2_*.txt") + versions = np.unique( + np.array([os.path.basename(p).split("_")[-3] for p in paths], + dtype=str)) + if len(versions) != 1: + raise ValueError( + "All station datasets in tar file must have same version." + ) + version = versions[0] + cube = build_cube(tar_file_system, paths, filelist) + + attrs = cfg['attributes'].copy() + attrs['version'] = version + attrs['source'] = attrs['source'] + + # Run the cmorization + for (short_name, var) in cfg['variables'].items(): + logger.info("CMORizing variable '%s'", short_name) + + attrs['mip'] = var['mip'] + + # Fix metadata + utils.set_global_atts(cube, attrs) + + # Save variable + utils.save_variable( + cube, + short_name, + out_dir, + attrs, + unlimited_dimensions=['time'], + ) +''' diff --git a/esmvaltool/cmorizers/data/formatters/datasets/noaa_gml_surface_flask_n2o.py b/esmvaltool/cmorizers/data/formatters/datasets/noaa_gml_surface_flask_n2o.py new file mode 100644 index 0000000000..e849d46a7e --- /dev/null +++ b/esmvaltool/cmorizers/data/formatters/datasets/noaa_gml_surface_flask_n2o.py @@ -0,0 +1,340 @@ +"""ESMValTool CMORizer for NOAA GML surface flask N2O data. + +Tier + Tier 2: freely available dataset. + +Source + https://gml.noaa.gov/ + +Last access + 20240730 + +Download and processing instructions + Download the following file: + https://gml.noaa.gov/aftp/data/trace_gases/n2o/flask/surface/n2o_surface-flask_ccgg_text.tar.gz +""" + +from esmvaltool.cmorizers.data.formatters.datasets.noaa_gml_surface_flask import cmorization_noaa_gml_surface_flask_trace_gas + + +def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date): + """Cmorization func call.""" + cmorization_noaa_gml_surface_flask_trace_gas(in_dir, out_dir, cfg, cfg_user, start_date, end_date) + + +''' +import os +import logging +import cf_units +import dask.array as da +import iris +import iris.coords +import iris.cube +import pandas as pd +import numpy as np +from datetime import datetime +from typing import NamedTuple +from pys2index import S2PointIndex +from fsspec.implementations.tar import TarFileSystem + +from esmvaltool.cmorizers.data import utilities as utils + +logger = logging.getLogger(__name__) + +FLASK_COLUMNS = ['site', 'year', 'month', 'value'] +DTYPE_FLASK_COLUMNS = {'site': str, 'year': int, 'month': int, 'value': float} + +class FlaskN2OStation(NamedTuple): + """NOAA GML surface flask N2O station data.""" + + site_code: str + site_name: str + site_country: str + site_latitude: float + site_longitude: float + site_elevation: float + site_utc2lst: str + data_frame: pd.DataFrame + + +class FlaskN2OStations(NamedTuple): + """NOAA GML surface flask N2O station data.""" + + site_code: list[str] + site_name: list[str] + site_country: list[str] + site_latitude: list[float] + site_longitude: list[float] + site_elevation: list[float] + site_utc2lst: list[str] + data_frame: list[pd.DataFrame] + + +def load_file(filesystem, filepath, filelist): + """Load NOAA GML surface flask N2O station data from the text file.""" + # Determine how many lines to skip in the header + skiprows = 0 + with filesystem.open(filepath, mode='rt') as file: + for line in file: + if line.startswith("#"): + skiprows = skiprows + 1 + # Read file as CSV + with filesystem.open(filepath, mode='rt') as file: + data_frame = pd.read_csv( + file, + delimiter=r'[\s]{1,20}', + skiprows=skiprows, + header=None, + names=FLASK_COLUMNS, + dtype=DTYPE_FLASK_COLUMNS, + engine='python' + ) + # Fetch data from event file : code, full_name, country, latitude, longitude, elevation, timezone + # Check first if the surface-flask or shipboard-flask file exists + filepath_event_alt1 = filepath.replace('month', 'event') + filepath_event_alt2 = filepath.replace('month', 'event').replace( + 'surface-flask_1_ccgg', 'shipboard-flask_1_ccgg') + filepath_event = None + if filepath_event_alt1 in filelist: + filepath_event = filepath_event_alt1 + elif filepath_event_alt2 in filelist: + filepath_event = filepath_event_alt2 + # Setup default values for additional attributes + site_code = filepath.split('/')[-1].split('_')[1].upper() + site_name = 'N/A' + site_country = 'N/A' + site_latitude = np.nan + site_longitude = np.nan + site_elevation = np.nan + site_utc2lst = 'N/A' + # Fetch attributes in event file if it exists + if filepath_event is not None: + with filesystem.open(filepath_event, mode='rt') as file: + for line in file: + # Observation site code + if line.startswith('# site_code :'): + site_code = line.strip().split(' : ')[-1] + # Site full name + if line.startswith('# site_name :'): + site_name = line.strip().split(' : ')[-1] + # Site country + if line.startswith('# site_country :'): + site_country = line.strip().split(' : ')[-1] + # Site latitude + if line.startswith('# site_latitude :'): + site_latitude = float(line.strip().split(' : ')[-1]) + # Site longitude + if line.startswith('# site_longitude :'): + site_longitude = float(line.strip().split(' : ')[-1]) + # Site elevation + if line.startswith('# site_elevation :'): + site_elevation = float(line.strip().split(' : ')[-1]) + # Site timezone + if line.startswith('# site_utc2lst :'): + site_utc2lst = line.strip().split(' : ')[-1] + # Check if site location is available otherwise return None + if np.any(np.isnan([site_latitude, site_longitude])): + return None + else: + # Datetime index + data_frame.index = pd.to_datetime( + data_frame['year'].astype(str) + '-' + data_frame['month'].astype(str)) + # Create FlaskN2OStation object + station = FlaskN2OStation( + site_code, + site_name, + site_country, + site_latitude, + site_longitude, + site_elevation, + site_utc2lst, + data_frame + ) + return station + + +def merge_stations(stations): + """Collect and merge station data into a FlaskN2OStations instance.""" + columns = {} + for name, dtype in ( + ("site_code", str), + ("site_name", str), + ("site_country", str), + ("site_latitude", np.float64), + ("site_longitude", np.float64), + ("site_elevation", np.float64), + ("site_utc2lst", str), + ("data_frame", object), + ): + columns[name] = np.array( + [getattr(station, name) for station in stations], + dtype=dtype, + ) + return FlaskN2OStations(**columns) + + +def assemble_cube(stations, idx): + """Assemble Iris cube with station data. + + Parameters + ---------- + stations : FlaskN2OStations + Station data + idx : int + Unique ids of all stations + + Returns + ------- + Iris cube + Iris cube with station data. + + Raises + ------ + ValueError + If station data has inconsistent variable names. + """ + min_time = np.array([df.index.min() for df in stations.data_frame]).min() + max_time = np.array([df.index.max() for df in stations.data_frame]).max() + date_index = pd.date_range(min_time, max_time, freq="MS") + data_frames = [df.reindex(index=date_index) for df in stations.data_frame] + all_data_columns = np.unique( + np.array([df.columns for df in data_frames], dtype=str), + axis=0, + ) + if len(all_data_columns) != 1: + raise ValueError( + "Station data frames has different sets of column names." + ) + + n2o = da.stack([ + df["value"].values for df in data_frames + ], axis=-1)[..., idx] + + times = date_index.to_pydatetime() + time_points = np.array( + [datetime(year=t.year, month=t.month, day=15) for t in times]) + time_bounds_lower = times + time_bounds_upper = np.array([ + datetime(year=t.year + (t.month == 12), + month=t.month + 1 - (t.month == 12) * 12, + day=1) for t in times + ]) + time_bounds = np.stack([time_bounds_lower, time_bounds_upper], axis=-1) + time_units = cf_units.Unit("days since 1850-01-01", calendar="standard") + time_coord = iris.coords.DimCoord( + points=time_units.date2num(time_points), + standard_name="time", + long_name="time", + var_name="time", + units=time_units, + bounds=time_units.date2num(time_bounds), + ) + index_coord = iris.coords.DimCoord( + points=da.arange(n2o.shape[1]), + standard_name=None, + long_name="Station index (arbitrary)", + var_name="station_index", + units="1", + ) + code_coord = iris.coords.AuxCoord( + points=stations.site_code[idx], + standard_name="platform_name", + long_name="NOAA GML CCGG Site Name", + var_name="site_code", + ) + elevation_coord = iris.coords.AuxCoord( + points=stations.site_elevation[idx], + standard_name="height_above_mean_sea_level", + long_name="Elevation", + var_name="elev", + units="m", + ) + latitude_coord = iris.coords.AuxCoord( + points=stations.site_latitude[idx], + standard_name="latitude", + long_name="Latitude", + var_name="lat", + units="degrees_north", + ) + longitude_coord = iris.coords.AuxCoord( + points=stations.site_longitude[idx], + standard_name="longitude", + long_name="Longitude", + var_name="lon", + units="degrees_east", + ) + cube = iris.cube.Cube( + data=da.ma.masked_array(n2o, da.isnan(n2o), fill_value=-999.999), + standard_name=( + "mole_fraction_of_nitrous_oxide_in_air"), + long_name="Mole Fraction of CO2", + var_name="n2os", + units="mol mol-1", + dim_coords_and_dims=[ + (time_coord, 0), + (index_coord, 1), + ], + aux_coords_and_dims=[ + (latitude_coord, 1), + (longitude_coord, 1), + (elevation_coord, 1), + (code_coord, 1), + ] + ) + return cube + + +def build_cube(filesystem, paths, filelist): + """Build station data cube.""" + individual_stations = [ + load_file(filesystem, file_path, filelist) for file_path in paths + ] + individual_stations = [s for s in individual_stations if s is not None] + stations = merge_stations(individual_stations) + latlon_points = np.stack([stations.site_latitude, stations.site_longitude], axis=-1) + index = S2PointIndex(latlon_points) + cell_ids = index.get_cell_ids() + idx = np.argsort(cell_ids) + cube = assemble_cube(stations, idx) + return cube + + +def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date): + """Cmorization func call.""" + raw_filename = cfg['filename'] + + tar_file_system = TarFileSystem(f"{in_dir}/{raw_filename}") + paths = tar_file_system.glob("n2o_surface-flask_ccgg_text/n2o_*_month.txt") + filelist = tar_file_system.glob("n2o_surface-flask_ccgg_text/n2o_*.txt") + versions = np.unique( + np.array([os.path.basename(p).split("_")[-3] for p in paths], + dtype=str)) + if len(versions) != 1: + raise ValueError( + "All station datasets in tar file must have same version." + ) + version = versions[0] + cube = build_cube(tar_file_system, paths, filelist) + + attrs = cfg['attributes'].copy() + attrs['version'] = version + attrs['source'] = attrs['source'] + + # Run the cmorization + for (short_name, var) in cfg['variables'].items(): + logger.info("CMORizing variable '%s'", short_name) + + attrs['mip'] = var['mip'] + + # Fix metadata + utils.set_global_atts(cube, attrs) + + # Save variable + utils.save_variable( + cube, + short_name, + out_dir, + attrs, + unlimited_dimensions=['time'], + ) +''' \ No newline at end of file diff --git a/esmvaltool/recipes/examples/recipe_check_obs.yml b/esmvaltool/recipes/examples/recipe_check_obs.yml index 880aef831a..e03af2610e 100644 --- a/esmvaltool/recipes/examples/recipe_check_obs.yml +++ b/esmvaltool/recipes/examples/recipe_check_obs.yml @@ -754,6 +754,33 @@ diagnostics: type: reanaly, version: v3b, start_year: 1854, end_year: 2019} scripts: null + NOAA-GML-SURFACE-FLASK-CH4: + description: NOAA Global Monitoring Lab Surface flask CH4 data check + variables: + ch4s: + additional_datasets: + - {dataset: NOAA-GML-SURFACE-FLASK-CH4, project: OBS6, mip: Amon, type: atmos, version: 1.0, tier: 2, + start_year: 1983, end_year: 2023} + scripts: null + + NOAA-GML-SURFACE-FLASK-CO2: + description: NOAA Global Monitoring Lab Surface flask CO2 data check + variables: + co2s: + additional_datasets: + - {dataset: NOAA-GML-SURFACE-FLASK-CO2, project: OBS6, mip: Amon, type: atmos, version: 1.0, tier: 2, + start_year: 1968, end_year: 2023} + scripts: null + + NOAA-GML-SURFACE-FLASK-N2O: + description: NOAA Global Monitoring Lab Surface flask N2O data check + variables: + n2os: + additional_datasets: + - {dataset: NOAA-GML-SURFACE-FLASK-N2O, project: OBS6, mip: Amon, type: atmos, version: 1.0, tier: 2, + start_year: 1997, end_year: 2023} + scripts: null + NOAA-MBL-CH4: description: NOAA marine boundary layer CH4 check variables: diff --git a/esmvaltool/references/noaa-gml-surface-flask-ch4.bibtex b/esmvaltool/references/noaa-gml-surface-flask-ch4.bibtex new file mode 100644 index 0000000000..dd3a6ca7aa --- /dev/null +++ b/esmvaltool/references/noaa-gml-surface-flask-ch4.bibtex @@ -0,0 +1,8 @@ +@misc{noaa-gml-surface-flask-ch4, + url = {https://www.esrl.noaa.gov/gmd/ccgg/flask.html}, + year = 2024, + author = {Lan, X., J.W. Mund, A.M. Crotwell, K.W. Thoning, E. Moglia, M. Madronich, K. Baugh, G. Petron, M.J. Crotwell, D. Neff, S. Wolter, T. Mefford and S. DeVogel}, + title = {Atmospheric Methane Dry Air Mole Fractions from the NOAA GML Carbon Cycle Cooperative Global Air Sampling Network, 1983-2023}, + doi = {10.15138/VNCZ-M766}, + howpublished = {via website https://www.esrl.noaa.gov/gmd/ccgg/flask.html, provided by the NOAA Global Monitoring Laboratory, Earth System Research Laboratories.} +} \ No newline at end of file diff --git a/esmvaltool/references/noaa-gml-surface-flask-co2.bibtex b/esmvaltool/references/noaa-gml-surface-flask-co2.bibtex new file mode 100644 index 0000000000..4bc0213dd4 --- /dev/null +++ b/esmvaltool/references/noaa-gml-surface-flask-co2.bibtex @@ -0,0 +1,8 @@ +@misc{noaa-gml-surface-flask-co2, + url = {https://www.esrl.noaa.gov/gmd/ccgg/flask.html}, + year = 2024, + author = {Lan, X., J.W. Mund, A.M. Crotwell, K.W. Thoning, E. Moglia, M. Madronich, K. Baugh, G. Petron, M.J. Crotwell, D. Neff, S. Wolter, T. Mefford and S. DeVogel}, + title = {Atmospheric Carbon Dioxide Dry Air Mole Fractions from the NOAA GML Carbon Cycle Cooperative Global Air Sampling Network, 1968-2023}, + doi = {10.15138/wkgj-f215}, + howpublished = {via website https://www.esrl.noaa.gov/gmd/ccgg/flask.html, provided by the NOAA Global Monitoring Laboratory, Earth System Research Laboratories.} +} \ No newline at end of file diff --git a/esmvaltool/references/noaa-gml-surface-flask-n2o.bibtex b/esmvaltool/references/noaa-gml-surface-flask-n2o.bibtex new file mode 100644 index 0000000000..08fa05045a --- /dev/null +++ b/esmvaltool/references/noaa-gml-surface-flask-n2o.bibtex @@ -0,0 +1,8 @@ +@misc{noaa-gml-surface-flask-n2o, + url = {https://www.esrl.noaa.gov/gmd/ccgg/flask.html}, + year = 2024, + author = {Lan, X., J.W. Mund, A.M. Crotwell, K.W. Thoning, E. Moglia, M. Madronich, K. Baugh, G. Petron, M.J. Crotwell, D. Neff, S. Wolter, T. Mefford and S. DeVogel}, + title = {Atmospheric Nitrous Oxide Dry Air Mole Fractions from the NOAA GML Carbon Cycle Cooperative Global Air Sampling Network, 1997-2023}, + doi = {10.15138/53g1-x417}, + howpublished = {via website https://www.esrl.noaa.gov/gmd/ccgg/flask.html, provided by the NOAA Global Monitoring Laboratory, Earth System Research Laboratories.} +} \ No newline at end of file From 541b4c3fb025f3d98438fbfccc109aa9c9f39492 Mon Sep 17 00:00:00 2001 From: Julien Lenhardt Date: Thu, 19 Dec 2024 16:46:11 +0100 Subject: [PATCH 2/3] Clean-up of commented code --- .../datasets/noaa_gml_surface_flask_ch4.py | 318 ------------------ .../datasets/noaa_gml_surface_flask_co2.py | 318 ------------------ .../datasets/noaa_gml_surface_flask_n2o.py | 318 ------------------ 3 files changed, 954 deletions(-) diff --git a/esmvaltool/cmorizers/data/formatters/datasets/noaa_gml_surface_flask_ch4.py b/esmvaltool/cmorizers/data/formatters/datasets/noaa_gml_surface_flask_ch4.py index 7c5f2a848a..0837a06c8d 100644 --- a/esmvaltool/cmorizers/data/formatters/datasets/noaa_gml_surface_flask_ch4.py +++ b/esmvaltool/cmorizers/data/formatters/datasets/noaa_gml_surface_flask_ch4.py @@ -20,321 +20,3 @@ def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date): """Cmorization func call.""" cmorization_noaa_gml_surface_flask_trace_gas(in_dir, out_dir, cfg, cfg_user, start_date, end_date) - - -''' -import os -import logging -import cf_units -import dask.array as da -import iris -import iris.coords -import iris.cube -import pandas as pd -import numpy as np -from datetime import datetime -from typing import NamedTuple -from pys2index import S2PointIndex -from fsspec.implementations.tar import TarFileSystem - -from esmvaltool.cmorizers.data import utilities as utils - -logger = logging.getLogger(__name__) - -FLASK_COLUMNS = ['site', 'year', 'month', 'value'] -DTYPE_FLASK_COLUMNS = {'site': str, 'year': int, 'month': int, 'value': float} - -class FlaskCH4Station(NamedTuple): - """NOAA GML surface flask CH4 station data.""" - - site_code: str - site_name: str - site_country: str - site_latitude: float - site_longitude: float - site_elevation: float - site_utc2lst: str - data_frame: pd.DataFrame - - -class FlaskCH4Stations(NamedTuple): - """NOAA GML surface flask CH4 station data.""" - - site_code: list[str] - site_name: list[str] - site_country: list[str] - site_latitude: list[float] - site_longitude: list[float] - site_elevation: list[float] - site_utc2lst: list[str] - data_frame: list[pd.DataFrame] - - -def load_file(filesystem, filepath, filelist): - """Load NOAA GML surface flask CH4 station data from the text file.""" - # Determine how many lines to skip in the header - skiprows = 0 - with filesystem.open(filepath, mode='rt') as file: - for line in file: - if line.startswith("#"): - skiprows = skiprows + 1 - # Read file as CSV - with filesystem.open(filepath, mode='rt') as file: - data_frame = pd.read_csv( - file, - delimiter=r'[\s]{1,20}', - skiprows=skiprows, - header=None, - names=FLASK_COLUMNS, - dtype=DTYPE_FLASK_COLUMNS, - engine='python' - ) - # Fetch data from event file : code, full_name, country, latitude, longitude, elevation, timezone - # Check first if the surface-flask or shipboard-flask file exists - filepath_event_alt1 = filepath.replace('month', 'event') - filepath_event_alt2 = filepath.replace('month', 'event').replace( - 'surface-flask_1_ccgg', 'shipboard-flask_1_ccgg') - filepath_event = None - if filepath_event_alt1 in filelist: - filepath_event = filepath_event_alt1 - elif filepath_event_alt2 in filelist: - filepath_event = filepath_event_alt2 - # Setup default values for additional attributes - site_code = filepath.split('/')[-1].split('_')[1].upper() - site_name = 'N/A' - site_country = 'N/A' - site_latitude = np.nan - site_longitude = np.nan - site_elevation = np.nan - site_utc2lst = 'N/A' - # Fetch attributes in event file if it exists - if filepath_event is not None: - with filesystem.open(filepath_event, mode='rt') as file: - for line in file: - # Observation site code - if line.startswith('# site_code :'): - site_code = line.strip().split(' : ')[-1] - # Site full name - if line.startswith('# site_name :'): - site_name = line.strip().split(' : ')[-1] - # Site country - if line.startswith('# site_country :'): - site_country = line.strip().split(' : ')[-1] - # Site latitude - if line.startswith('# site_latitude :'): - site_latitude = float(line.strip().split(' : ')[-1]) - # Site longitude - if line.startswith('# site_longitude :'): - site_longitude = float(line.strip().split(' : ')[-1]) - # Site elevation - if line.startswith('# site_elevation :'): - site_elevation = float(line.strip().split(' : ')[-1]) - # Site timezone - if line.startswith('# site_utc2lst :'): - site_utc2lst = line.strip().split(' : ')[-1] - # Check if site location is available otherwise return None - if np.any(np.isnan([site_latitude, site_longitude])): - return None - else: - # Datetime index - data_frame.index = pd.to_datetime( - data_frame['year'].astype(str) + '-' + data_frame['month'].astype(str)) - # Create FlaskCH4Station object - station = FlaskCH4Station( - site_code, - site_name, - site_country, - site_latitude, - site_longitude, - site_elevation, - site_utc2lst, - data_frame - ) - return station - - -def merge_stations(stations): - """Collect and merge station data into a FlaskCH4Stations instance.""" - columns = {} - for name, dtype in ( - ("site_code", str), - ("site_name", str), - ("site_country", str), - ("site_latitude", np.float64), - ("site_longitude", np.float64), - ("site_elevation", np.float64), - ("site_utc2lst", str), - ("data_frame", object), - ): - columns[name] = np.array( - [getattr(station, name) for station in stations], - dtype=dtype, - ) - return FlaskCH4Stations(**columns) - - -def assemble_cube(stations, idx): - """Assemble Iris cube with station data. - - Parameters - ---------- - stations : FlaskCH4Stations - Station data - idx : int - Unique ids of all stations - - Returns - ------- - Iris cube - Iris cube with station data. - - Raises - ------ - ValueError - If station data has inconsistent variable names. - """ - min_time = np.array([df.index.min() for df in stations.data_frame]).min() - max_time = np.array([df.index.max() for df in stations.data_frame]).max() - date_index = pd.date_range(min_time, max_time, freq="MS") - data_frames = [df.reindex(index=date_index) for df in stations.data_frame] - all_data_columns = np.unique( - np.array([df.columns for df in data_frames], dtype=str), - axis=0, - ) - if len(all_data_columns) != 1: - raise ValueError( - "Station data frames has different sets of column names." - ) - - ch4 = da.stack([ - df["value"].values for df in data_frames - ], axis=-1)[..., idx] - - times = date_index.to_pydatetime() - time_points = np.array( - [datetime(year=t.year, month=t.month, day=15) for t in times]) - time_bounds_lower = times - time_bounds_upper = np.array([ - datetime(year=t.year + (t.month == 12), - month=t.month + 1 - (t.month == 12) * 12, - day=1) for t in times - ]) - time_bounds = np.stack([time_bounds_lower, time_bounds_upper], axis=-1) - time_units = cf_units.Unit("days since 1850-01-01", calendar="standard") - time_coord = iris.coords.DimCoord( - points=time_units.date2num(time_points), - standard_name="time", - long_name="time", - var_name="time", - units=time_units, - bounds=time_units.date2num(time_bounds), - ) - index_coord = iris.coords.DimCoord( - points=da.arange(ch4.shape[1]), - standard_name=None, - long_name="Station index (arbitrary)", - var_name="station_index", - units="1", - ) - code_coord = iris.coords.AuxCoord( - points=stations.site_code[idx], - standard_name="platform_name", - long_name="NOAA GML CCGG Site Name", - var_name="site_code", - ) - elevation_coord = iris.coords.AuxCoord( - points=stations.site_elevation[idx], - standard_name="height_above_mean_sea_level", - long_name="Elevation", - var_name="elev", - units="m", - ) - latitude_coord = iris.coords.AuxCoord( - points=stations.site_latitude[idx], - standard_name="latitude", - long_name="Latitude", - var_name="lat", - units="degrees_north", - ) - longitude_coord = iris.coords.AuxCoord( - points=stations.site_longitude[idx], - standard_name="longitude", - long_name="Longitude", - var_name="lon", - units="degrees_east", - ) - cube = iris.cube.Cube( - data=da.ma.masked_array(ch4, da.isnan(ch4), fill_value=-999.999), - standard_name=( - "mole_fraction_of_methane_in_air"), - long_name="Mole Fraction of CH4", - var_name="ch4s", - units="mol mol-1", - dim_coords_and_dims=[ - (time_coord, 0), - (index_coord, 1), - ], - aux_coords_and_dims=[ - (latitude_coord, 1), - (longitude_coord, 1), - (elevation_coord, 1), - (code_coord, 1), - ] - ) - return cube - - -def build_cube(filesystem, paths, filelist): - """Build station data cube.""" - individual_stations = [ - load_file(filesystem, file_path, filelist) for file_path in paths - ] - individual_stations = [s for s in individual_stations if s is not None] - stations = merge_stations(individual_stations) - latlon_points = np.stack([stations.site_latitude, stations.site_longitude], axis=-1) - index = S2PointIndex(latlon_points) - cell_ids = index.get_cell_ids() - idx = np.argsort(cell_ids) - cube = assemble_cube(stations, idx) - return cube - - -def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date): - """Cmorization func call.""" - raw_filename = cfg['filename'] - - tar_file_system = TarFileSystem(f"{in_dir}/{raw_filename}") - paths = tar_file_system.glob("ch4_surface-flask_ccgg_text/ch4_*_month.txt") - filelist = tar_file_system.glob("ch4_surface-flask_ccgg_text/ch4_*.txt") - versions = np.unique( - np.array([os.path.basename(p).split("_")[-3] for p in paths], - dtype=str)) - if len(versions) != 1: - raise ValueError( - "All station datasets in tar file must have same version." - ) - version = versions[0] - cube = build_cube(tar_file_system, paths, filelist) - - attrs = cfg['attributes'].copy() - attrs['version'] = version - attrs['source'] = attrs['source'] - - # Run the cmorization - for (short_name, var) in cfg['variables'].items(): - logger.info("CMORizing variable '%s'", short_name) - - attrs['mip'] = var['mip'] - - # Fix metadata - utils.set_global_atts(cube, attrs) - - # Save variable - utils.save_variable( - cube, - short_name, - out_dir, - attrs, - unlimited_dimensions=['time'], - ) -''' \ No newline at end of file diff --git a/esmvaltool/cmorizers/data/formatters/datasets/noaa_gml_surface_flask_co2.py b/esmvaltool/cmorizers/data/formatters/datasets/noaa_gml_surface_flask_co2.py index 179698d26a..51c02628e1 100644 --- a/esmvaltool/cmorizers/data/formatters/datasets/noaa_gml_surface_flask_co2.py +++ b/esmvaltool/cmorizers/data/formatters/datasets/noaa_gml_surface_flask_co2.py @@ -20,321 +20,3 @@ def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date): """Cmorization func call.""" cmorization_noaa_gml_surface_flask_trace_gas(in_dir, out_dir, cfg, cfg_user, start_date, end_date) - - -''' -import os -import logging -import cf_units -import dask.array as da -import iris -import iris.coords -import iris.cube -import pandas as pd -import numpy as np -from datetime import datetime -from typing import NamedTuple -from pys2index import S2PointIndex -from fsspec.implementations.tar import TarFileSystem - -from esmvaltool.cmorizers.data import utilities as utils - -logger = logging.getLogger(__name__) - -FLASK_COLUMNS = ['site', 'year', 'month', 'value'] -DTYPE_FLASK_COLUMNS = {'site': str, 'year': int, 'month': int, 'value': float} - -class FlaskCO2Station(NamedTuple): - """NOAA GML surface flask CO2 station data.""" - - site_code: str - site_name: str - site_country: str - site_latitude: float - site_longitude: float - site_elevation: float - site_utc2lst: str - data_frame: pd.DataFrame - - -class FlaskCO2Stations(NamedTuple): - """NOAA GML surface flask CO2 station data.""" - - site_code: list[str] - site_name: list[str] - site_country: list[str] - site_latitude: list[float] - site_longitude: list[float] - site_elevation: list[float] - site_utc2lst: list[str] - data_frame: list[pd.DataFrame] - - -def load_file(filesystem, filepath, filelist): - """Load NOAA GML surface flask CO2 station data from the text file.""" - # Determine how many lines to skip in the header - skiprows = 0 - with filesystem.open(filepath, mode='rt') as file: - for line in file: - if line.startswith("#"): - skiprows = skiprows + 1 - # Read file as CSV - with filesystem.open(filepath, mode='rt') as file: - data_frame = pd.read_csv( - file, - delimiter=r'[\s]{1,20}', - skiprows=skiprows, - header=None, - names=FLASK_COLUMNS, - dtype=DTYPE_FLASK_COLUMNS, - engine='python' - ) - # Fetch data from event file : code, full_name, country, latitude, longitude, elevation, timezone - # Check first if the surface-flask or shipboard-flask file exists - filepath_event_alt1 = filepath.replace('month', 'event') - filepath_event_alt2 = filepath.replace('month', 'event').replace( - 'surface-flask_1_ccgg', 'shipboard-flask_1_ccgg') - filepath_event = None - if filepath_event_alt1 in filelist: - filepath_event = filepath_event_alt1 - elif filepath_event_alt2 in filelist: - filepath_event = filepath_event_alt2 - # Setup default values for additional attributes - site_code = filepath.split('/')[-1].split('_')[1].upper() - site_name = 'N/A' - site_country = 'N/A' - site_latitude = np.nan - site_longitude = np.nan - site_elevation = np.nan - site_utc2lst = 'N/A' - # Fetch attributes in event file if it exists - if filepath_event is not None: - with filesystem.open(filepath_event, mode='rt') as file: - for line in file: - # Observation site code - if line.startswith('# site_code :'): - site_code = line.strip().split(' : ')[-1] - # Site full name - if line.startswith('# site_name :'): - site_name = line.strip().split(' : ')[-1] - # Site country - if line.startswith('# site_country :'): - site_country = line.strip().split(' : ')[-1] - # Site latitude - if line.startswith('# site_latitude :'): - site_latitude = float(line.strip().split(' : ')[-1]) - # Site longitude - if line.startswith('# site_longitude :'): - site_longitude = float(line.strip().split(' : ')[-1]) - # Site elevation - if line.startswith('# site_elevation :'): - site_elevation = float(line.strip().split(' : ')[-1]) - # Site timezone - if line.startswith('# site_utc2lst :'): - site_utc2lst = line.strip().split(' : ')[-1] - # Check if site location is available otherwise return None - if np.any(np.isnan([site_latitude, site_longitude])): - return None - else: - # Datetime index - data_frame.index = pd.to_datetime( - data_frame['year'].astype(str) + '-' + data_frame['month'].astype(str)) - # Create FlaskCO2Station object - station = FlaskCO2Station( - site_code, - site_name, - site_country, - site_latitude, - site_longitude, - site_elevation, - site_utc2lst, - data_frame - ) - return station - - -def merge_stations(stations): - """Collect and merge station data into a FlaskCO2Stations instance.""" - columns = {} - for name, dtype in ( - ("site_code", str), - ("site_name", str), - ("site_country", str), - ("site_latitude", np.float64), - ("site_longitude", np.float64), - ("site_elevation", np.float64), - ("site_utc2lst", str), - ("data_frame", object), - ): - columns[name] = np.array( - [getattr(station, name) for station in stations], - dtype=dtype, - ) - return FlaskCO2Stations(**columns) - - -def assemble_cube(stations, idx): - """Assemble Iris cube with station data. - - Parameters - ---------- - stations : FlaskCO2Stations - Station data - idx : int - Unique ids of all stations - - Returns - ------- - Iris cube - Iris cube with station data. - - Raises - ------ - ValueError - If station data has inconsistent variable names. - """ - min_time = np.array([df.index.min() for df in stations.data_frame]).min() - max_time = np.array([df.index.max() for df in stations.data_frame]).max() - date_index = pd.date_range(min_time, max_time, freq="MS") - data_frames = [df.reindex(index=date_index) for df in stations.data_frame] - all_data_columns = np.unique( - np.array([df.columns for df in data_frames], dtype=str), - axis=0, - ) - if len(all_data_columns) != 1: - raise ValueError( - "Station data frames has different sets of column names." - ) - - co2 = da.stack([ - df["value"].values for df in data_frames - ], axis=-1)[..., idx] - - times = date_index.to_pydatetime() - time_points = np.array( - [datetime(year=t.year, month=t.month, day=15) for t in times]) - time_bounds_lower = times - time_bounds_upper = np.array([ - datetime(year=t.year + (t.month == 12), - month=t.month + 1 - (t.month == 12) * 12, - day=1) for t in times - ]) - time_bounds = np.stack([time_bounds_lower, time_bounds_upper], axis=-1) - time_units = cf_units.Unit("days since 1850-01-01", calendar="standard") - time_coord = iris.coords.DimCoord( - points=time_units.date2num(time_points), - standard_name="time", - long_name="time", - var_name="time", - units=time_units, - bounds=time_units.date2num(time_bounds), - ) - index_coord = iris.coords.DimCoord( - points=da.arange(co2.shape[1]), - standard_name=None, - long_name="Station index (arbitrary)", - var_name="station_index", - units="1", - ) - code_coord = iris.coords.AuxCoord( - points=stations.site_code[idx], - standard_name="platform_name", - long_name="NOAA GML CCGG Site Name", - var_name="site_code", - ) - elevation_coord = iris.coords.AuxCoord( - points=stations.site_elevation[idx], - standard_name="height_above_mean_sea_level", - long_name="Elevation", - var_name="elev", - units="m", - ) - latitude_coord = iris.coords.AuxCoord( - points=stations.site_latitude[idx], - standard_name="latitude", - long_name="Latitude", - var_name="lat", - units="degrees_north", - ) - longitude_coord = iris.coords.AuxCoord( - points=stations.site_longitude[idx], - standard_name="longitude", - long_name="Longitude", - var_name="lon", - units="degrees_east", - ) - cube = iris.cube.Cube( - data=da.ma.masked_array(co2, da.isnan(co2), fill_value=-999.999), - standard_name=( - "mole_fraction_of_carbon_dioxide_in_air"), - long_name="Mole Fraction of CO2", - var_name="co2s", - units="micromol mol-1", - dim_coords_and_dims=[ - (time_coord, 0), - (index_coord, 1), - ], - aux_coords_and_dims=[ - (latitude_coord, 1), - (longitude_coord, 1), - (elevation_coord, 1), - (code_coord, 1), - ] - ) - return cube - - -def build_cube(filesystem, paths, filelist): - """Build station data cube.""" - individual_stations = [ - load_file(filesystem, file_path, filelist) for file_path in paths - ] - individual_stations = [s for s in individual_stations if s is not None] - stations = merge_stations(individual_stations) - latlon_points = np.stack([stations.site_latitude, stations.site_longitude], axis=-1) - index = S2PointIndex(latlon_points) - cell_ids = index.get_cell_ids() - idx = np.argsort(cell_ids) - cube = assemble_cube(stations, idx) - return cube - - -def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date): - """Cmorization func call.""" - raw_filename = cfg['filename'] - - tar_file_system = TarFileSystem(f"{in_dir}/{raw_filename}") - paths = tar_file_system.glob("co2_surface-flask_ccgg_text/co2_*_month.txt") - filelist = tar_file_system.glob("co2_surface-flask_ccgg_text/co2_*.txt") - versions = np.unique( - np.array([os.path.basename(p).split("_")[-3] for p in paths], - dtype=str)) - if len(versions) != 1: - raise ValueError( - "All station datasets in tar file must have same version." - ) - version = versions[0] - cube = build_cube(tar_file_system, paths, filelist) - - attrs = cfg['attributes'].copy() - attrs['version'] = version - attrs['source'] = attrs['source'] - - # Run the cmorization - for (short_name, var) in cfg['variables'].items(): - logger.info("CMORizing variable '%s'", short_name) - - attrs['mip'] = var['mip'] - - # Fix metadata - utils.set_global_atts(cube, attrs) - - # Save variable - utils.save_variable( - cube, - short_name, - out_dir, - attrs, - unlimited_dimensions=['time'], - ) -''' diff --git a/esmvaltool/cmorizers/data/formatters/datasets/noaa_gml_surface_flask_n2o.py b/esmvaltool/cmorizers/data/formatters/datasets/noaa_gml_surface_flask_n2o.py index e849d46a7e..35c7ae7250 100644 --- a/esmvaltool/cmorizers/data/formatters/datasets/noaa_gml_surface_flask_n2o.py +++ b/esmvaltool/cmorizers/data/formatters/datasets/noaa_gml_surface_flask_n2o.py @@ -20,321 +20,3 @@ def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date): """Cmorization func call.""" cmorization_noaa_gml_surface_flask_trace_gas(in_dir, out_dir, cfg, cfg_user, start_date, end_date) - - -''' -import os -import logging -import cf_units -import dask.array as da -import iris -import iris.coords -import iris.cube -import pandas as pd -import numpy as np -from datetime import datetime -from typing import NamedTuple -from pys2index import S2PointIndex -from fsspec.implementations.tar import TarFileSystem - -from esmvaltool.cmorizers.data import utilities as utils - -logger = logging.getLogger(__name__) - -FLASK_COLUMNS = ['site', 'year', 'month', 'value'] -DTYPE_FLASK_COLUMNS = {'site': str, 'year': int, 'month': int, 'value': float} - -class FlaskN2OStation(NamedTuple): - """NOAA GML surface flask N2O station data.""" - - site_code: str - site_name: str - site_country: str - site_latitude: float - site_longitude: float - site_elevation: float - site_utc2lst: str - data_frame: pd.DataFrame - - -class FlaskN2OStations(NamedTuple): - """NOAA GML surface flask N2O station data.""" - - site_code: list[str] - site_name: list[str] - site_country: list[str] - site_latitude: list[float] - site_longitude: list[float] - site_elevation: list[float] - site_utc2lst: list[str] - data_frame: list[pd.DataFrame] - - -def load_file(filesystem, filepath, filelist): - """Load NOAA GML surface flask N2O station data from the text file.""" - # Determine how many lines to skip in the header - skiprows = 0 - with filesystem.open(filepath, mode='rt') as file: - for line in file: - if line.startswith("#"): - skiprows = skiprows + 1 - # Read file as CSV - with filesystem.open(filepath, mode='rt') as file: - data_frame = pd.read_csv( - file, - delimiter=r'[\s]{1,20}', - skiprows=skiprows, - header=None, - names=FLASK_COLUMNS, - dtype=DTYPE_FLASK_COLUMNS, - engine='python' - ) - # Fetch data from event file : code, full_name, country, latitude, longitude, elevation, timezone - # Check first if the surface-flask or shipboard-flask file exists - filepath_event_alt1 = filepath.replace('month', 'event') - filepath_event_alt2 = filepath.replace('month', 'event').replace( - 'surface-flask_1_ccgg', 'shipboard-flask_1_ccgg') - filepath_event = None - if filepath_event_alt1 in filelist: - filepath_event = filepath_event_alt1 - elif filepath_event_alt2 in filelist: - filepath_event = filepath_event_alt2 - # Setup default values for additional attributes - site_code = filepath.split('/')[-1].split('_')[1].upper() - site_name = 'N/A' - site_country = 'N/A' - site_latitude = np.nan - site_longitude = np.nan - site_elevation = np.nan - site_utc2lst = 'N/A' - # Fetch attributes in event file if it exists - if filepath_event is not None: - with filesystem.open(filepath_event, mode='rt') as file: - for line in file: - # Observation site code - if line.startswith('# site_code :'): - site_code = line.strip().split(' : ')[-1] - # Site full name - if line.startswith('# site_name :'): - site_name = line.strip().split(' : ')[-1] - # Site country - if line.startswith('# site_country :'): - site_country = line.strip().split(' : ')[-1] - # Site latitude - if line.startswith('# site_latitude :'): - site_latitude = float(line.strip().split(' : ')[-1]) - # Site longitude - if line.startswith('# site_longitude :'): - site_longitude = float(line.strip().split(' : ')[-1]) - # Site elevation - if line.startswith('# site_elevation :'): - site_elevation = float(line.strip().split(' : ')[-1]) - # Site timezone - if line.startswith('# site_utc2lst :'): - site_utc2lst = line.strip().split(' : ')[-1] - # Check if site location is available otherwise return None - if np.any(np.isnan([site_latitude, site_longitude])): - return None - else: - # Datetime index - data_frame.index = pd.to_datetime( - data_frame['year'].astype(str) + '-' + data_frame['month'].astype(str)) - # Create FlaskN2OStation object - station = FlaskN2OStation( - site_code, - site_name, - site_country, - site_latitude, - site_longitude, - site_elevation, - site_utc2lst, - data_frame - ) - return station - - -def merge_stations(stations): - """Collect and merge station data into a FlaskN2OStations instance.""" - columns = {} - for name, dtype in ( - ("site_code", str), - ("site_name", str), - ("site_country", str), - ("site_latitude", np.float64), - ("site_longitude", np.float64), - ("site_elevation", np.float64), - ("site_utc2lst", str), - ("data_frame", object), - ): - columns[name] = np.array( - [getattr(station, name) for station in stations], - dtype=dtype, - ) - return FlaskN2OStations(**columns) - - -def assemble_cube(stations, idx): - """Assemble Iris cube with station data. - - Parameters - ---------- - stations : FlaskN2OStations - Station data - idx : int - Unique ids of all stations - - Returns - ------- - Iris cube - Iris cube with station data. - - Raises - ------ - ValueError - If station data has inconsistent variable names. - """ - min_time = np.array([df.index.min() for df in stations.data_frame]).min() - max_time = np.array([df.index.max() for df in stations.data_frame]).max() - date_index = pd.date_range(min_time, max_time, freq="MS") - data_frames = [df.reindex(index=date_index) for df in stations.data_frame] - all_data_columns = np.unique( - np.array([df.columns for df in data_frames], dtype=str), - axis=0, - ) - if len(all_data_columns) != 1: - raise ValueError( - "Station data frames has different sets of column names." - ) - - n2o = da.stack([ - df["value"].values for df in data_frames - ], axis=-1)[..., idx] - - times = date_index.to_pydatetime() - time_points = np.array( - [datetime(year=t.year, month=t.month, day=15) for t in times]) - time_bounds_lower = times - time_bounds_upper = np.array([ - datetime(year=t.year + (t.month == 12), - month=t.month + 1 - (t.month == 12) * 12, - day=1) for t in times - ]) - time_bounds = np.stack([time_bounds_lower, time_bounds_upper], axis=-1) - time_units = cf_units.Unit("days since 1850-01-01", calendar="standard") - time_coord = iris.coords.DimCoord( - points=time_units.date2num(time_points), - standard_name="time", - long_name="time", - var_name="time", - units=time_units, - bounds=time_units.date2num(time_bounds), - ) - index_coord = iris.coords.DimCoord( - points=da.arange(n2o.shape[1]), - standard_name=None, - long_name="Station index (arbitrary)", - var_name="station_index", - units="1", - ) - code_coord = iris.coords.AuxCoord( - points=stations.site_code[idx], - standard_name="platform_name", - long_name="NOAA GML CCGG Site Name", - var_name="site_code", - ) - elevation_coord = iris.coords.AuxCoord( - points=stations.site_elevation[idx], - standard_name="height_above_mean_sea_level", - long_name="Elevation", - var_name="elev", - units="m", - ) - latitude_coord = iris.coords.AuxCoord( - points=stations.site_latitude[idx], - standard_name="latitude", - long_name="Latitude", - var_name="lat", - units="degrees_north", - ) - longitude_coord = iris.coords.AuxCoord( - points=stations.site_longitude[idx], - standard_name="longitude", - long_name="Longitude", - var_name="lon", - units="degrees_east", - ) - cube = iris.cube.Cube( - data=da.ma.masked_array(n2o, da.isnan(n2o), fill_value=-999.999), - standard_name=( - "mole_fraction_of_nitrous_oxide_in_air"), - long_name="Mole Fraction of CO2", - var_name="n2os", - units="mol mol-1", - dim_coords_and_dims=[ - (time_coord, 0), - (index_coord, 1), - ], - aux_coords_and_dims=[ - (latitude_coord, 1), - (longitude_coord, 1), - (elevation_coord, 1), - (code_coord, 1), - ] - ) - return cube - - -def build_cube(filesystem, paths, filelist): - """Build station data cube.""" - individual_stations = [ - load_file(filesystem, file_path, filelist) for file_path in paths - ] - individual_stations = [s for s in individual_stations if s is not None] - stations = merge_stations(individual_stations) - latlon_points = np.stack([stations.site_latitude, stations.site_longitude], axis=-1) - index = S2PointIndex(latlon_points) - cell_ids = index.get_cell_ids() - idx = np.argsort(cell_ids) - cube = assemble_cube(stations, idx) - return cube - - -def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date): - """Cmorization func call.""" - raw_filename = cfg['filename'] - - tar_file_system = TarFileSystem(f"{in_dir}/{raw_filename}") - paths = tar_file_system.glob("n2o_surface-flask_ccgg_text/n2o_*_month.txt") - filelist = tar_file_system.glob("n2o_surface-flask_ccgg_text/n2o_*.txt") - versions = np.unique( - np.array([os.path.basename(p).split("_")[-3] for p in paths], - dtype=str)) - if len(versions) != 1: - raise ValueError( - "All station datasets in tar file must have same version." - ) - version = versions[0] - cube = build_cube(tar_file_system, paths, filelist) - - attrs = cfg['attributes'].copy() - attrs['version'] = version - attrs['source'] = attrs['source'] - - # Run the cmorization - for (short_name, var) in cfg['variables'].items(): - logger.info("CMORizing variable '%s'", short_name) - - attrs['mip'] = var['mip'] - - # Fix metadata - utils.set_global_atts(cube, attrs) - - # Save variable - utils.save_variable( - cube, - short_name, - out_dir, - attrs, - unlimited_dimensions=['time'], - ) -''' \ No newline at end of file From 7ec5145fdaae00bb6d4b46f1553a27e5733fbd7c Mon Sep 17 00:00:00 2001 From: jlenh Date: Thu, 19 Dec 2024 18:52:37 +0100 Subject: [PATCH 3/3] Solve circleCI code style errors --- .../datasets/noaa_gml_surface_flask_ch4.py | 4 +++- .../datasets/noaa_gml_surface_flask_co2.py | 4 +++- .../datasets/noaa_gml_surface_flask_n2o.py | 4 +++- .../datasets/noaa_gml_surface_flask.py | 24 +++++++++++++------ .../datasets/noaa_gml_surface_flask_ch4.py | 4 +++- .../datasets/noaa_gml_surface_flask_co2.py | 4 +++- .../datasets/noaa_gml_surface_flask_n2o.py | 4 +++- 7 files changed, 35 insertions(+), 13 deletions(-) diff --git a/esmvaltool/cmorizers/data/downloaders/datasets/noaa_gml_surface_flask_ch4.py b/esmvaltool/cmorizers/data/downloaders/datasets/noaa_gml_surface_flask_ch4.py index 506052a668..859b189082 100644 --- a/esmvaltool/cmorizers/data/downloaders/datasets/noaa_gml_surface_flask_ch4.py +++ b/esmvaltool/cmorizers/data/downloaders/datasets/noaa_gml_surface_flask_ch4.py @@ -32,7 +32,9 @@ def download_dataset(config, dataset, dataset_info, dataset_info=dataset_info, overwrite=overwrite, ) + path = "https://gml.noaa.gov/aftp/data/trace_gases/ch4/flask/surface/" + file = "ch4_surface-flask_ccgg_text.tar.gz" downloader.download_file( - "https://gml.noaa.gov/aftp/data/trace_gases/ch4/flask/surface/ch4_surface-flask_ccgg_text.tar.gz", + path + file, wget_options=[], ) diff --git a/esmvaltool/cmorizers/data/downloaders/datasets/noaa_gml_surface_flask_co2.py b/esmvaltool/cmorizers/data/downloaders/datasets/noaa_gml_surface_flask_co2.py index dc90249ecf..72ba78ed54 100644 --- a/esmvaltool/cmorizers/data/downloaders/datasets/noaa_gml_surface_flask_co2.py +++ b/esmvaltool/cmorizers/data/downloaders/datasets/noaa_gml_surface_flask_co2.py @@ -32,7 +32,9 @@ def download_dataset(config, dataset, dataset_info, dataset_info=dataset_info, overwrite=overwrite, ) + path = "https://gml.noaa.gov/aftp/data/trace_gases/co2/flask/surface/" + file = "co2_surface-flask_ccgg_text.tar.gz" downloader.download_file( - "https://gml.noaa.gov/aftp/data/trace_gases/co2/flask/surface/co2_surface-flask_ccgg_text.tar.gz", + path + file, wget_options=[], ) diff --git a/esmvaltool/cmorizers/data/downloaders/datasets/noaa_gml_surface_flask_n2o.py b/esmvaltool/cmorizers/data/downloaders/datasets/noaa_gml_surface_flask_n2o.py index 53c1565e2e..673e0e6019 100644 --- a/esmvaltool/cmorizers/data/downloaders/datasets/noaa_gml_surface_flask_n2o.py +++ b/esmvaltool/cmorizers/data/downloaders/datasets/noaa_gml_surface_flask_n2o.py @@ -32,7 +32,9 @@ def download_dataset(config, dataset, dataset_info, dataset_info=dataset_info, overwrite=overwrite, ) + path = "https://gml.noaa.gov/aftp/data/trace_gases/n2o/flask/surface/" + file = "n2o_surface-flask_ccgg_text.tar.gz" downloader.download_file( - "https://gml.noaa.gov/aftp/data/trace_gases/n2o/flask/surface/n2o_surface-flask_ccgg_text.tar.gz", + path + file, wget_options=[], ) diff --git a/esmvaltool/cmorizers/data/formatters/datasets/noaa_gml_surface_flask.py b/esmvaltool/cmorizers/data/formatters/datasets/noaa_gml_surface_flask.py index 8cc3d735a7..a1e914278a 100644 --- a/esmvaltool/cmorizers/data/formatters/datasets/noaa_gml_surface_flask.py +++ b/esmvaltool/cmorizers/data/formatters/datasets/noaa_gml_surface_flask.py @@ -38,6 +38,7 @@ DTYPE_FLASK_COLUMNS = {'site': str, 'year': int, 'month': int, 'value': float} TRACE_GAS_UNITS = {'ch4s': '1e-09', 'co2s': '1e-06', 'n2os': '1e-09'} + class FlaskStation(NamedTuple): """NOAA GML surface flask station data.""" @@ -83,7 +84,8 @@ def load_file(filesystem, filepath, filelist): dtype=DTYPE_FLASK_COLUMNS, engine='python' ) - # Fetch data from event file : code, full_name, country, latitude, longitude, elevation, timezone + # Fetch data from event file : code, full_name, country, + # latitude, longitude, elevation, timezone # Check first if the surface-flask or shipboard-flask file exists filepath_event_alt1 = filepath.replace('month', 'event') filepath_event_alt2 = filepath.replace('month', 'event').replace( @@ -132,7 +134,9 @@ def load_file(filesystem, filepath, filelist): else: # Datetime index data_frame.index = pd.to_datetime( - data_frame['year'].astype(str) + '-' + data_frame['month'].astype(str)) + data_frame['year'].astype(str) + + '-' + data_frame['month'].astype(str) + ) # Create FlaskCO2Station object station = FlaskStation( site_code, @@ -260,11 +264,12 @@ def assemble_cube(stations, idx, var_attrs): units="degrees_east", ) cube = iris.cube.Cube( - data=da.ma.masked_array(trace_gas, da.isnan(trace_gas), fill_value=-999.999), + data=da.ma.masked_array( + trace_gas, da.isnan(trace_gas), fill_value=-999.999), standard_name=(var_attrs['standard_name']), long_name=var_attrs['long_name'], var_name=var_attrs['raw_name'], - units=TRACE_GAS_UNITS[var_attrs['raw_name']], # var_attrs['raw_units'], + units=TRACE_GAS_UNITS[var_attrs['raw_name']], dim_coords_and_dims=[ (time_coord, 0), (index_coord, 1), @@ -284,9 +289,14 @@ def build_cube(filesystem, paths, filelist, var_attrs): individual_stations = [ load_file(filesystem, file_path, filelist) for file_path in paths ] - individual_stations = [s for s in individual_stations if s is not None] + individual_stations = [ + s for s in individual_stations if s is not None + ] stations = merge_stations(individual_stations) - latlon_points = np.stack([stations.site_latitude, stations.site_longitude], axis=-1) + latlon_points = np.stack( + [stations.site_latitude, stations.site_longitude], + axis=-1 + ) index = S2PointIndex(latlon_points) cell_ids = index.get_cell_ids() idx = np.argsort(cell_ids) @@ -304,7 +314,7 @@ def cmorization_noaa_gml_surface_flask_trace_gas( f'{cfg['trace_gas']}_surface-flask_ccgg_text/{cfg['trace_gas']}_*_month.txt') filelist = tar_file_system.glob( f'{cfg['trace_gas']}_surface-flask_ccgg_text/{cfg['trace_gas']}_*.txt') - + versions = np.unique( np.array([os.path.basename(p).split("_")[-3] for p in paths], dtype=str)) diff --git a/esmvaltool/cmorizers/data/formatters/datasets/noaa_gml_surface_flask_ch4.py b/esmvaltool/cmorizers/data/formatters/datasets/noaa_gml_surface_flask_ch4.py index 0837a06c8d..36f42df1f6 100644 --- a/esmvaltool/cmorizers/data/formatters/datasets/noaa_gml_surface_flask_ch4.py +++ b/esmvaltool/cmorizers/data/formatters/datasets/noaa_gml_surface_flask_ch4.py @@ -19,4 +19,6 @@ def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date): """Cmorization func call.""" - cmorization_noaa_gml_surface_flask_trace_gas(in_dir, out_dir, cfg, cfg_user, start_date, end_date) + cmorization_noaa_gml_surface_flask_trace_gas( + in_dir, out_dir, cfg, cfg_user, start_date, end_date + ) diff --git a/esmvaltool/cmorizers/data/formatters/datasets/noaa_gml_surface_flask_co2.py b/esmvaltool/cmorizers/data/formatters/datasets/noaa_gml_surface_flask_co2.py index 51c02628e1..2c9a6f4482 100644 --- a/esmvaltool/cmorizers/data/formatters/datasets/noaa_gml_surface_flask_co2.py +++ b/esmvaltool/cmorizers/data/formatters/datasets/noaa_gml_surface_flask_co2.py @@ -19,4 +19,6 @@ def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date): """Cmorization func call.""" - cmorization_noaa_gml_surface_flask_trace_gas(in_dir, out_dir, cfg, cfg_user, start_date, end_date) + cmorization_noaa_gml_surface_flask_trace_gas( + in_dir, out_dir, cfg, cfg_user, start_date, end_date + ) diff --git a/esmvaltool/cmorizers/data/formatters/datasets/noaa_gml_surface_flask_n2o.py b/esmvaltool/cmorizers/data/formatters/datasets/noaa_gml_surface_flask_n2o.py index 35c7ae7250..58a13194ed 100644 --- a/esmvaltool/cmorizers/data/formatters/datasets/noaa_gml_surface_flask_n2o.py +++ b/esmvaltool/cmorizers/data/formatters/datasets/noaa_gml_surface_flask_n2o.py @@ -19,4 +19,6 @@ def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date): """Cmorization func call.""" - cmorization_noaa_gml_surface_flask_trace_gas(in_dir, out_dir, cfg, cfg_user, start_date, end_date) + cmorization_noaa_gml_surface_flask_trace_gas( + in_dir, out_dir, cfg, cfg_user, start_date, end_date + )