From 5333f6ec603c52da7dcc15aeab9cd6fa0e0e5889 Mon Sep 17 00:00:00 2001
From: Julien Lenhardt <julien.lenhardt@smhi.se>
Date: Tue, 17 Dec 2024 17:33:06 +0100
Subject: [PATCH 1/3] Additional files for new NOAA GML surface flask
 measurements datasets

---
 doc/sphinx/source/input.rst                   |   6 +
 .../NOAA-GML-SURFACE-FLASK-CH4.yml            |  25 ++
 .../NOAA-GML-SURFACE-FLASK-CO2.yml            |  25 ++
 .../NOAA-GML-SURFACE-FLASK-N2O.yml            |  25 ++
 esmvaltool/cmorizers/data/datasets.yml        |  24 ++
 .../datasets/noaa_gml_surface_flask_ch4.py    |  38 ++
 .../datasets/noaa_gml_surface_flask_co2.py    |  38 ++
 .../datasets/noaa_gml_surface_flask_n2o.py    |  38 ++
 .../datasets/noaa_gml_surface_flask.py        | 340 ++++++++++++++++++
 .../datasets/noaa_gml_surface_flask_ch4.py    | 340 ++++++++++++++++++
 .../datasets/noaa_gml_surface_flask_co2.py    | 340 ++++++++++++++++++
 .../datasets/noaa_gml_surface_flask_n2o.py    | 340 ++++++++++++++++++
 .../recipes/examples/recipe_check_obs.yml     |  27 ++
 .../noaa-gml-surface-flask-ch4.bibtex         |   8 +
 .../noaa-gml-surface-flask-co2.bibtex         |   8 +
 .../noaa-gml-surface-flask-n2o.bibtex         |   8 +
 16 files changed, 1630 insertions(+)
 create mode 100644 esmvaltool/cmorizers/data/cmor_config/NOAA-GML-SURFACE-FLASK-CH4.yml
 create mode 100644 esmvaltool/cmorizers/data/cmor_config/NOAA-GML-SURFACE-FLASK-CO2.yml
 create mode 100644 esmvaltool/cmorizers/data/cmor_config/NOAA-GML-SURFACE-FLASK-N2O.yml
 create mode 100644 esmvaltool/cmorizers/data/downloaders/datasets/noaa_gml_surface_flask_ch4.py
 create mode 100644 esmvaltool/cmorizers/data/downloaders/datasets/noaa_gml_surface_flask_co2.py
 create mode 100644 esmvaltool/cmorizers/data/downloaders/datasets/noaa_gml_surface_flask_n2o.py
 create mode 100644 esmvaltool/cmorizers/data/formatters/datasets/noaa_gml_surface_flask.py
 create mode 100644 esmvaltool/cmorizers/data/formatters/datasets/noaa_gml_surface_flask_ch4.py
 create mode 100644 esmvaltool/cmorizers/data/formatters/datasets/noaa_gml_surface_flask_co2.py
 create mode 100644 esmvaltool/cmorizers/data/formatters/datasets/noaa_gml_surface_flask_n2o.py
 create mode 100644 esmvaltool/references/noaa-gml-surface-flask-ch4.bibtex
 create mode 100644 esmvaltool/references/noaa-gml-surface-flask-co2.bibtex
 create mode 100644 esmvaltool/references/noaa-gml-surface-flask-n2o.bibtex

diff --git a/doc/sphinx/source/input.rst b/doc/sphinx/source/input.rst
index f9bcfafc3e..c9237e0c0b 100644
--- a/doc/sphinx/source/input.rst
+++ b/doc/sphinx/source/input.rst
@@ -437,6 +437,12 @@ A list of the datasets for which a CMORizers is available is provided in the fol
 +------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+
 | NOAA-MBL-CH4                 | ch4s (Amon)                                                                                          |   2  | Python          |
 +------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+
+| NOAA-GML-SURFACE-FLASK-CH4   | ch4s (Amon)                                                                                          |   2  | Python          |
++------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+
+| NOAA-GML-SURFACE-FLASK-CO2   | co2s (Amon)                                                                                          |   2  | Python          |
++------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+
+| NOAA-GML-SURFACE-FLASK-N2O   | n2os (Amon)                                                                                          |   2  | Python          |
++------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+
 | NOAAGlobalTemp               | tasa (Amon)                                                                                          |   2  | Python          |
 +------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+
 | NSIDC-0116-[nh|sh] [#note4]_ | usi, vsi (day)                                                                                       |   3  | Python          |
diff --git a/esmvaltool/cmorizers/data/cmor_config/NOAA-GML-SURFACE-FLASK-CH4.yml b/esmvaltool/cmorizers/data/cmor_config/NOAA-GML-SURFACE-FLASK-CH4.yml
new file mode 100644
index 0000000000..a2da13c2ce
--- /dev/null
+++ b/esmvaltool/cmorizers/data/cmor_config/NOAA-GML-SURFACE-FLASK-CH4.yml
@@ -0,0 +1,25 @@
+---
+# Filename
+filename: 'ch4_surface-flask_ccgg_text.tar.gz'
+
+# Trace gas
+trace_gas: ch4
+
+# Common global attributes for Cmorizer output
+attributes:
+  dataset_id: NOAA-GML-SURFACE-FLASK-CH4
+  version: '1.0'
+  tier: 2
+  modeling_realm: atmos
+  project_id: OBS6
+  source: 'https://gml.noaa.gov/aftp/data/trace_gases/ch4/flask/surface/ch4_surface-flask_ccgg_text.tar.gz'
+  reference: 'noaa-gml-surface-flask-ch4'
+
+# Variables to cmorize
+variables:
+  ch4s:
+    mip: Amon
+    raw_name: ch4s
+    raw_units: 'mol mol-1'
+    standard_name: mole_fraction_of_methane_in_air
+    long_name: 'Mole Fraction of CH4'
diff --git a/esmvaltool/cmorizers/data/cmor_config/NOAA-GML-SURFACE-FLASK-CO2.yml b/esmvaltool/cmorizers/data/cmor_config/NOAA-GML-SURFACE-FLASK-CO2.yml
new file mode 100644
index 0000000000..cb95e1711f
--- /dev/null
+++ b/esmvaltool/cmorizers/data/cmor_config/NOAA-GML-SURFACE-FLASK-CO2.yml
@@ -0,0 +1,25 @@
+---
+# Filename
+filename: 'co2_surface-flask_ccgg_text.tar.gz'
+
+# Trace gas
+trace_gas: co2
+
+# Common global attributes for Cmorizer output
+attributes:
+  dataset_id: NOAA-GML-SURFACE-FLASK-CO2
+  version: '1.0'
+  tier: 2
+  modeling_realm: atmos
+  project_id: OBS6
+  source: 'https://gml.noaa.gov/aftp/data/trace_gases/co2/flask/surface/co2_surface-flask_ccgg_text.tar.gz'
+  reference: 'noaa-gml-surface-flask-co2'
+
+# Variables to cmorize
+variables:
+  co2s:
+    mip: Amon
+    raw_name: co2s
+    raw_units: 'micromol mol-1'
+    standard_name: mole_fraction_of_carbon_dioxide_in_air
+    long_name: 'Mole Fraction of CO2'
diff --git a/esmvaltool/cmorizers/data/cmor_config/NOAA-GML-SURFACE-FLASK-N2O.yml b/esmvaltool/cmorizers/data/cmor_config/NOAA-GML-SURFACE-FLASK-N2O.yml
new file mode 100644
index 0000000000..beca6ea1da
--- /dev/null
+++ b/esmvaltool/cmorizers/data/cmor_config/NOAA-GML-SURFACE-FLASK-N2O.yml
@@ -0,0 +1,25 @@
+---
+# Filename
+filename: 'n2o_surface-flask_ccgg_text.tar.gz'
+
+# Trace gas
+trace_gas: n2o
+
+# Common global attributes for Cmorizer output
+attributes:
+  dataset_id: NOAA-GML-SURFACE-FLASK-N2O
+  version: '1.0'
+  tier: 2
+  modeling_realm: atmos
+  project_id: OBS6
+  source: 'https://gml.noaa.gov/aftp/data/trace_gases/n2o/flask/surface/n2o_surface-flask_ccgg_text.tar.gz'
+  reference: 'noaa-gml-surface-flask-n2o'
+
+# Variables to cmorize
+variables:
+  n2os:
+    mip: Amon
+    raw_name: n2os
+    raw_units: 'mol mol-1'
+    standard_name: mole_fraction_of_nitrous_oxide_in_air
+    long_name: 'Mole Fraction of N2O'
diff --git a/esmvaltool/cmorizers/data/datasets.yml b/esmvaltool/cmorizers/data/datasets.yml
index 4c7c168009..42603afb4b 100644
--- a/esmvaltool/cmorizers/data/datasets.yml
+++ b/esmvaltool/cmorizers/data/datasets.yml
@@ -1103,6 +1103,30 @@ datasets:
       Download the following files:
         ersst.v5.yyyymm.nc
         for years 1854 onwards
+  
+  NOAA-GML-SURFACE-FLASK-CH4:
+    tier: 2
+    source: https://gml.noaa.gov/aftp/data/trace_gases/ch4/flask/surface/
+    last_access: 2024-07-30
+    info: |
+      Download the following archive:
+      ch4_surface-flask_ccgg_text.tar.gz
+
+  NOAA-GML-SURFACE-FLASK-CO2:
+    tier: 2
+    source: https://gml.noaa.gov/aftp/data/trace_gases/co2/flask/surface/
+    last_access: 2024-07-30
+    info: |
+      Download the following archive:
+      co2_surface-flask_ccgg_text.tar.gz
+
+  NOAA-GML-SURFACE-FLASK-N2O:
+    tier: 2
+    source: https://gml.noaa.gov/aftp/data/trace_gases/n2o/flask/surface/
+    last access: 2024-07-30
+    info: |
+      Download the following archive:
+      n2o_surface-flask_ccgg_text.tar.gz
 
   NOAAGlobalTemp:
     tier: 2
diff --git a/esmvaltool/cmorizers/data/downloaders/datasets/noaa_gml_surface_flask_ch4.py b/esmvaltool/cmorizers/data/downloaders/datasets/noaa_gml_surface_flask_ch4.py
new file mode 100644
index 0000000000..506052a668
--- /dev/null
+++ b/esmvaltool/cmorizers/data/downloaders/datasets/noaa_gml_surface_flask_ch4.py
@@ -0,0 +1,38 @@
+"""Script to download NOAA Global Monitoring Lab surface flask data
+for CH4 from NOAA's archive."""
+import logging
+
+from esmvaltool.cmorizers.data.downloaders.wget import WGetDownloader
+
+logger = logging.getLogger(__name__)
+
+
+def download_dataset(config, dataset, dataset_info,
+                     start_date, end_date, overwrite):
+    """Download dataset.
+
+    Parameters
+    ----------
+    config : dict
+        ESMValTool's user configuration
+    dataset : str
+        Name of the dataset
+    dataset_info : dict
+         Dataset information from the datasets.yml file
+    start_date : datetime
+        Start of the interval to download
+    end_date : datetime
+        End of the interval to download
+    overwrite : bool
+        Overwrite already downloaded files
+    """
+    downloader = WGetDownloader(
+        config=config,
+        dataset=dataset,
+        dataset_info=dataset_info,
+        overwrite=overwrite,
+    )
+    downloader.download_file(
+        "https://gml.noaa.gov/aftp/data/trace_gases/ch4/flask/surface/ch4_surface-flask_ccgg_text.tar.gz",
+        wget_options=[],
+    )
diff --git a/esmvaltool/cmorizers/data/downloaders/datasets/noaa_gml_surface_flask_co2.py b/esmvaltool/cmorizers/data/downloaders/datasets/noaa_gml_surface_flask_co2.py
new file mode 100644
index 0000000000..dc90249ecf
--- /dev/null
+++ b/esmvaltool/cmorizers/data/downloaders/datasets/noaa_gml_surface_flask_co2.py
@@ -0,0 +1,38 @@
+"""Script to download NOAA Global Monitoring Lab surface flask data
+for CO2 from NOAA's archive."""
+import logging
+
+from esmvaltool.cmorizers.data.downloaders.wget import WGetDownloader
+
+logger = logging.getLogger(__name__)
+
+
+def download_dataset(config, dataset, dataset_info,
+                     start_date, end_date, overwrite):
+    """Download dataset.
+
+    Parameters
+    ----------
+    config : dict
+        ESMValTool's user configuration
+    dataset : str
+        Name of the dataset
+    dataset_info : dict
+         Dataset information from the datasets.yml file
+    start_date : datetime
+        Start of the interval to download
+    end_date : datetime
+        End of the interval to download
+    overwrite : bool
+        Overwrite already downloaded files
+    """
+    downloader = WGetDownloader(
+        config=config,
+        dataset=dataset,
+        dataset_info=dataset_info,
+        overwrite=overwrite,
+    )
+    downloader.download_file(
+        "https://gml.noaa.gov/aftp/data/trace_gases/co2/flask/surface/co2_surface-flask_ccgg_text.tar.gz",
+        wget_options=[],
+    )
diff --git a/esmvaltool/cmorizers/data/downloaders/datasets/noaa_gml_surface_flask_n2o.py b/esmvaltool/cmorizers/data/downloaders/datasets/noaa_gml_surface_flask_n2o.py
new file mode 100644
index 0000000000..53c1565e2e
--- /dev/null
+++ b/esmvaltool/cmorizers/data/downloaders/datasets/noaa_gml_surface_flask_n2o.py
@@ -0,0 +1,38 @@
+"""Script to download NOAA Global Monitoring Lab surface flask data
+for N2O from NOAA's archive."""
+import logging
+
+from esmvaltool.cmorizers.data.downloaders.wget import WGetDownloader
+
+logger = logging.getLogger(__name__)
+
+
+def download_dataset(config, dataset, dataset_info,
+                     start_date, end_date, overwrite):
+    """Download dataset.
+
+    Parameters
+    ----------
+    config : dict
+        ESMValTool's user configuration
+    dataset : str
+        Name of the dataset
+    dataset_info : dict
+         Dataset information from the datasets.yml file
+    start_date : datetime
+        Start of the interval to download
+    end_date : datetime
+        End of the interval to download
+    overwrite : bool
+        Overwrite already downloaded files
+    """
+    downloader = WGetDownloader(
+        config=config,
+        dataset=dataset,
+        dataset_info=dataset_info,
+        overwrite=overwrite,
+    )
+    downloader.download_file(
+        "https://gml.noaa.gov/aftp/data/trace_gases/n2o/flask/surface/n2o_surface-flask_ccgg_text.tar.gz",
+        wget_options=[],
+    )
diff --git a/esmvaltool/cmorizers/data/formatters/datasets/noaa_gml_surface_flask.py b/esmvaltool/cmorizers/data/formatters/datasets/noaa_gml_surface_flask.py
new file mode 100644
index 0000000000..8cc3d735a7
--- /dev/null
+++ b/esmvaltool/cmorizers/data/formatters/datasets/noaa_gml_surface_flask.py
@@ -0,0 +1,340 @@
+"""ESMValTool CMORizer for NOAA GML surface flask data.
+
+Tier
+    Tier 2: freely available dataset.
+
+Source
+    https://gml.noaa.gov/
+
+Last access
+    20240730
+
+Download and processing instructions
+    Download one of the following file:
+    https://gml.noaa.gov/aftp/data/trace_gases/ch4/flask/surface/ch4_surface-flask_ccgg_text.tar.gz
+    https://gml.noaa.gov/aftp/data/trace_gases/co2/flask/surface/co2_surface-flask_ccgg_text.tar.gz
+    https://gml.noaa.gov/aftp/data/trace_gases/n2o/flask/surface/n2o_surface-flask_ccgg_text.tar.gz
+"""
+
+import os
+import logging
+import cf_units
+import dask.array as da
+import iris
+import iris.coords
+import iris.cube
+import pandas as pd
+import numpy as np
+from datetime import datetime
+from typing import NamedTuple
+from pys2index import S2PointIndex
+from fsspec.implementations.tar import TarFileSystem
+
+from esmvaltool.cmorizers.data import utilities as utils
+
+logger = logging.getLogger(__name__)
+
+FLASK_COLUMNS = ['site', 'year', 'month', 'value']
+DTYPE_FLASK_COLUMNS = {'site': str, 'year': int, 'month': int, 'value': float}
+TRACE_GAS_UNITS = {'ch4s': '1e-09', 'co2s': '1e-06', 'n2os': '1e-09'}
+
+class FlaskStation(NamedTuple):
+    """NOAA GML surface flask station data."""
+
+    site_code: str
+    site_name: str
+    site_country: str
+    site_latitude: float
+    site_longitude: float
+    site_elevation: float
+    site_utc2lst: str
+    data_frame: pd.DataFrame
+
+
+class FlaskStations(NamedTuple):
+    """NOAA GML surface flask station data."""
+
+    site_code: list[str]
+    site_name: list[str]
+    site_country: list[str]
+    site_latitude: list[float]
+    site_longitude: list[float]
+    site_elevation: list[float]
+    site_utc2lst: list[str]
+    data_frame: list[pd.DataFrame]
+
+
+def load_file(filesystem, filepath, filelist):
+    """Load NOAA GML surface flask station data from the text file."""
+    # Determine how many lines to skip in the header
+    skiprows = 0
+    with filesystem.open(filepath, mode='rt') as file:
+        for line in file:
+            if line.startswith("#"):
+                skiprows = skiprows + 1
+    # Read file as CSV
+    with filesystem.open(filepath, mode='rt') as file:
+        data_frame = pd.read_csv(
+            file,
+            delimiter=r'[\s]{1,20}',
+            skiprows=skiprows,
+            header=None,
+            names=FLASK_COLUMNS,
+            dtype=DTYPE_FLASK_COLUMNS,
+            engine='python'
+        )
+    # Fetch data from event file : code, full_name, country, latitude, longitude, elevation, timezone 
+    # Check first if the surface-flask or shipboard-flask file exists
+    filepath_event_alt1 = filepath.replace('month', 'event')
+    filepath_event_alt2 = filepath.replace('month', 'event').replace(
+            'surface-flask_1_ccgg', 'shipboard-flask_1_ccgg')
+    filepath_event = None
+    if filepath_event_alt1 in filelist:
+        filepath_event = filepath_event_alt1
+    elif filepath_event_alt2 in filelist:
+        filepath_event = filepath_event_alt2
+    # Setup default values for additional attributes
+    site_code = filepath.split('/')[-1].split('_')[1].upper()
+    site_name = 'N/A'
+    site_country = 'N/A'
+    site_latitude = np.nan
+    site_longitude = np.nan
+    site_elevation = np.nan
+    site_utc2lst = 'N/A'
+    # Fetch attributes in event file if it exists
+    if filepath_event is not None:
+        with filesystem.open(filepath_event, mode='rt') as file:
+            for line in file:
+                # Observation site code
+                if line.startswith('# site_code :'):
+                    site_code = line.strip().split(' : ')[-1]
+                # Site full name
+                if line.startswith('# site_name :'):
+                    site_name = line.strip().split(' : ')[-1]
+                # Site country
+                if line.startswith('# site_country :'):
+                    site_country = line.strip().split(' : ')[-1]
+                # Site latitude
+                if line.startswith('# site_latitude :'):
+                    site_latitude = float(line.strip().split(' : ')[-1])
+                # Site longitude
+                if line.startswith('# site_longitude :'):
+                    site_longitude = float(line.strip().split(' : ')[-1])
+                # Site elevation
+                if line.startswith('# site_elevation :'):
+                    site_elevation = float(line.strip().split(' : ')[-1])
+                # Site timezone
+                if line.startswith('# site_utc2lst :'):
+                    site_utc2lst = line.strip().split(' : ')[-1]
+    # Check if site location is available otherwise return None
+    if np.any(np.isnan([site_latitude, site_longitude])):
+        return None
+    else:
+        # Datetime index
+        data_frame.index = pd.to_datetime(
+            data_frame['year'].astype(str) + '-' + data_frame['month'].astype(str))    
+        # Create FlaskCO2Station object
+        station = FlaskStation(
+            site_code,
+            site_name,
+            site_country,
+            site_latitude,
+            site_longitude,
+            site_elevation,
+            site_utc2lst,
+            data_frame
+        )
+        return station
+
+
+def merge_stations(stations):
+    """Collect and merge station data into a FlaskStations instance."""
+    columns = {}
+    for name, dtype in (
+        ("site_code", str),
+        ("site_name", str),
+        ("site_country", str),
+        ("site_latitude", np.float64),
+        ("site_longitude", np.float64),
+        ("site_elevation", np.float64),
+        ("site_utc2lst", str),
+        ("data_frame", object),
+    ):
+        columns[name] = np.array(
+            [getattr(station, name) for station in stations],
+            dtype=dtype,
+        )
+    return FlaskStations(**columns)
+
+
+def assemble_cube(stations, idx, var_attrs):
+    """Assemble Iris cube with station data.
+
+    Parameters
+    ----------
+    stations : FlaskStations
+        Station data
+    idx : int
+        Unique ids of all stations
+    var_attrs : dictionnary
+        Contains attributes related to the trace gas
+
+    Returns
+    -------
+    Iris cube
+        Iris cube with station data.
+
+    Raises
+    ------
+    ValueError
+        If station data has inconsistent variable names.
+    """
+    min_time = np.array([df.index.min() for df in stations.data_frame]).min()
+    max_time = np.array([df.index.max() for df in stations.data_frame]).max()
+    date_index = pd.date_range(min_time, max_time, freq="MS")
+    data_frames = [df.reindex(index=date_index) for df in stations.data_frame]
+    all_data_columns = np.unique(
+        np.array([df.columns for df in data_frames], dtype=str),
+        axis=0,
+    )
+    if len(all_data_columns) != 1:
+        raise ValueError(
+            "Station data frames has different sets of column names."
+        )
+
+    trace_gas = da.stack([
+        df["value"].values for df in data_frames
+        ], axis=-1)[..., idx]
+
+    times = date_index.to_pydatetime()
+    time_points = np.array(
+        [datetime(year=t.year, month=t.month, day=15) for t in times])
+    time_bounds_lower = times
+    time_bounds_upper = np.array([
+        datetime(year=t.year + (t.month == 12),
+                 month=t.month + 1 - (t.month == 12) * 12,
+                 day=1) for t in times
+    ])
+    time_bounds = np.stack([time_bounds_lower, time_bounds_upper], axis=-1)
+    time_units = cf_units.Unit("days since 1850-01-01", calendar="standard")
+    time_coord = iris.coords.DimCoord(
+        points=time_units.date2num(time_points),
+        standard_name="time",
+        long_name="time",
+        var_name="time",
+        units=time_units,
+        bounds=time_units.date2num(time_bounds),
+    )
+    index_coord = iris.coords.DimCoord(
+        points=da.arange(trace_gas.shape[1]),
+        standard_name=None,
+        long_name="Station index (arbitrary)",
+        var_name="station_index",
+        units="1",
+    )
+    code_coord = iris.coords.AuxCoord(
+        points=stations.site_code[idx],
+        standard_name="platform_name",
+        long_name="NOAA GML CCGG Site Name",
+        var_name="site_code",
+    )
+    elevation_coord = iris.coords.AuxCoord(
+        points=stations.site_elevation[idx],
+        standard_name="height_above_mean_sea_level",
+        long_name="Elevation",
+        var_name="elev",
+        units="m",
+    )
+    latitude_coord = iris.coords.AuxCoord(
+        points=stations.site_latitude[idx],
+        standard_name="latitude",
+        long_name="Latitude",
+        var_name="lat",
+        units="degrees_north",
+    )
+    longitude_coord = iris.coords.AuxCoord(
+        points=stations.site_longitude[idx],
+        standard_name="longitude",
+        long_name="Longitude",
+        var_name="lon",
+        units="degrees_east",
+    )
+    cube = iris.cube.Cube(
+        data=da.ma.masked_array(trace_gas, da.isnan(trace_gas), fill_value=-999.999),
+        standard_name=(var_attrs['standard_name']),
+        long_name=var_attrs['long_name'],
+        var_name=var_attrs['raw_name'],
+        units=TRACE_GAS_UNITS[var_attrs['raw_name']],  # var_attrs['raw_units'],
+        dim_coords_and_dims=[
+            (time_coord, 0),
+            (index_coord, 1),
+        ],
+        aux_coords_and_dims=[
+            (latitude_coord, 1),
+            (longitude_coord, 1),
+            (elevation_coord, 1),
+            (code_coord, 1),
+        ]
+    )
+    return cube
+
+
+def build_cube(filesystem, paths, filelist, var_attrs):
+    """Build station data cube."""
+    individual_stations = [
+        load_file(filesystem, file_path, filelist) for file_path in paths
+    ]
+    individual_stations = [s for s in individual_stations if s is not None]
+    stations = merge_stations(individual_stations)
+    latlon_points = np.stack([stations.site_latitude, stations.site_longitude], axis=-1)
+    index = S2PointIndex(latlon_points)
+    cell_ids = index.get_cell_ids()
+    idx = np.argsort(cell_ids)
+    cube = assemble_cube(stations, idx, var_attrs)
+    return cube
+
+
+def cmorization_noaa_gml_surface_flask_trace_gas(
+        in_dir, out_dir, cfg, cfg_user, start_date, end_date):
+    """Cmorization func call."""
+    raw_filename = cfg['filename']
+
+    tar_file_system = TarFileSystem(f"{in_dir}/{raw_filename}")
+    paths = tar_file_system.glob(
+        f'{cfg['trace_gas']}_surface-flask_ccgg_text/{cfg['trace_gas']}_*_month.txt')
+    filelist = tar_file_system.glob(
+        f'{cfg['trace_gas']}_surface-flask_ccgg_text/{cfg['trace_gas']}_*.txt')
+    
+    versions = np.unique(
+        np.array([os.path.basename(p).split("_")[-3] for p in paths],
+                 dtype=str))
+    if len(versions) != 1:
+        raise ValueError(
+            "All station datasets in tar file must have same version."
+        )
+    version = versions[0]
+
+    var_attrs = cfg['variables'][f'{cfg['trace_gas']}s']
+    cube = build_cube(tar_file_system, paths, filelist, var_attrs)
+
+    attrs = cfg['attributes'].copy()
+    attrs['version'] = version
+    attrs['source'] = attrs['source']
+
+    # Run the cmorization
+    for (short_name, var) in cfg['variables'].items():
+        logger.info("CMORizing variable '%s'", short_name)
+
+        attrs['mip'] = var['mip']
+
+        # Fix metadata
+        utils.set_global_atts(cube, attrs)
+
+        # Save variable
+        utils.save_variable(
+            cube,
+            short_name,
+            out_dir,
+            attrs,
+            unlimited_dimensions=['time'],
+        )
diff --git a/esmvaltool/cmorizers/data/formatters/datasets/noaa_gml_surface_flask_ch4.py b/esmvaltool/cmorizers/data/formatters/datasets/noaa_gml_surface_flask_ch4.py
new file mode 100644
index 0000000000..7c5f2a848a
--- /dev/null
+++ b/esmvaltool/cmorizers/data/formatters/datasets/noaa_gml_surface_flask_ch4.py
@@ -0,0 +1,340 @@
+"""ESMValTool CMORizer for NOAA GML surface flask CH4 data.
+
+Tier
+    Tier 2: freely available dataset.
+
+Source
+    https://gml.noaa.gov/
+
+Last access
+    20240730
+
+Download and processing instructions
+    Download the following file:
+    https://gml.noaa.gov/aftp/data/trace_gases/ch4/flask/surface/ch4_surface-flask_ccgg_text.tar.gz
+"""
+
+from esmvaltool.cmorizers.data.formatters.datasets.noaa_gml_surface_flask import cmorization_noaa_gml_surface_flask_trace_gas
+
+
+def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date):
+    """Cmorization func call."""
+    cmorization_noaa_gml_surface_flask_trace_gas(in_dir, out_dir, cfg, cfg_user, start_date, end_date)
+
+
+'''
+import os
+import logging
+import cf_units
+import dask.array as da
+import iris
+import iris.coords
+import iris.cube
+import pandas as pd
+import numpy as np
+from datetime import datetime
+from typing import NamedTuple
+from pys2index import S2PointIndex
+from fsspec.implementations.tar import TarFileSystem
+
+from esmvaltool.cmorizers.data import utilities as utils
+
+logger = logging.getLogger(__name__)
+
+FLASK_COLUMNS = ['site', 'year', 'month', 'value']
+DTYPE_FLASK_COLUMNS = {'site': str, 'year': int, 'month': int, 'value': float}
+
+class FlaskCH4Station(NamedTuple):
+    """NOAA GML surface flask CH4 station data."""
+
+    site_code: str
+    site_name: str
+    site_country: str
+    site_latitude: float
+    site_longitude: float
+    site_elevation: float
+    site_utc2lst: str
+    data_frame: pd.DataFrame
+
+
+class FlaskCH4Stations(NamedTuple):
+    """NOAA GML surface flask CH4 station data."""
+
+    site_code: list[str]
+    site_name: list[str]
+    site_country: list[str]
+    site_latitude: list[float]
+    site_longitude: list[float]
+    site_elevation: list[float]
+    site_utc2lst: list[str]
+    data_frame: list[pd.DataFrame]
+
+
+def load_file(filesystem, filepath, filelist):
+    """Load NOAA GML surface flask CH4 station data from the text file."""
+    # Determine how many lines to skip in the header
+    skiprows = 0
+    with filesystem.open(filepath, mode='rt') as file:
+        for line in file:
+            if line.startswith("#"):
+                skiprows = skiprows + 1
+    # Read file as CSV
+    with filesystem.open(filepath, mode='rt') as file:
+        data_frame = pd.read_csv(
+            file,
+            delimiter=r'[\s]{1,20}',
+            skiprows=skiprows,
+            header=None,
+            names=FLASK_COLUMNS,
+            dtype=DTYPE_FLASK_COLUMNS,
+            engine='python'
+        )
+    # Fetch data from event file : code, full_name, country, latitude, longitude, elevation, timezone 
+    # Check first if the surface-flask or shipboard-flask file exists
+    filepath_event_alt1 = filepath.replace('month', 'event')
+    filepath_event_alt2 = filepath.replace('month', 'event').replace(
+            'surface-flask_1_ccgg', 'shipboard-flask_1_ccgg')
+    filepath_event = None
+    if filepath_event_alt1 in filelist:
+        filepath_event = filepath_event_alt1
+    elif filepath_event_alt2 in filelist:
+        filepath_event = filepath_event_alt2
+    # Setup default values for additional attributes
+    site_code = filepath.split('/')[-1].split('_')[1].upper()
+    site_name = 'N/A'
+    site_country = 'N/A'
+    site_latitude = np.nan
+    site_longitude = np.nan
+    site_elevation = np.nan
+    site_utc2lst = 'N/A'
+    # Fetch attributes in event file if it exists
+    if filepath_event is not None:
+        with filesystem.open(filepath_event, mode='rt') as file:
+            for line in file:
+                # Observation site code
+                if line.startswith('# site_code :'):
+                    site_code = line.strip().split(' : ')[-1]
+                # Site full name
+                if line.startswith('# site_name :'):
+                    site_name = line.strip().split(' : ')[-1]
+                # Site country
+                if line.startswith('# site_country :'):
+                    site_country = line.strip().split(' : ')[-1]
+                # Site latitude
+                if line.startswith('# site_latitude :'):
+                    site_latitude = float(line.strip().split(' : ')[-1])
+                # Site longitude
+                if line.startswith('# site_longitude :'):
+                    site_longitude = float(line.strip().split(' : ')[-1])
+                # Site elevation
+                if line.startswith('# site_elevation :'):
+                    site_elevation = float(line.strip().split(' : ')[-1])
+                # Site timezone
+                if line.startswith('# site_utc2lst :'):
+                    site_utc2lst = line.strip().split(' : ')[-1]
+    # Check if site location is available otherwise return None
+    if np.any(np.isnan([site_latitude, site_longitude])):
+        return None
+    else:
+        # Datetime index
+        data_frame.index = pd.to_datetime(
+            data_frame['year'].astype(str) + '-' + data_frame['month'].astype(str))    
+        # Create FlaskCH4Station object
+        station = FlaskCH4Station(
+            site_code,
+            site_name,
+            site_country,
+            site_latitude,
+            site_longitude,
+            site_elevation,
+            site_utc2lst,
+            data_frame
+        )
+        return station
+
+
+def merge_stations(stations):
+    """Collect and merge station data into a FlaskCH4Stations instance."""
+    columns = {}
+    for name, dtype in (
+        ("site_code", str),
+        ("site_name", str),
+        ("site_country", str),
+        ("site_latitude", np.float64),
+        ("site_longitude", np.float64),
+        ("site_elevation", np.float64),
+        ("site_utc2lst", str),
+        ("data_frame", object),
+    ):
+        columns[name] = np.array(
+            [getattr(station, name) for station in stations],
+            dtype=dtype,
+        )
+    return FlaskCH4Stations(**columns)
+
+
+def assemble_cube(stations, idx):
+    """Assemble Iris cube with station data.
+
+    Parameters
+    ----------
+    stations : FlaskCH4Stations
+        Station data
+    idx : int
+        Unique ids of all stations
+
+    Returns
+    -------
+    Iris cube
+        Iris cube with station data.
+
+    Raises
+    ------
+    ValueError
+        If station data has inconsistent variable names.
+    """
+    min_time = np.array([df.index.min() for df in stations.data_frame]).min()
+    max_time = np.array([df.index.max() for df in stations.data_frame]).max()
+    date_index = pd.date_range(min_time, max_time, freq="MS")
+    data_frames = [df.reindex(index=date_index) for df in stations.data_frame]
+    all_data_columns = np.unique(
+        np.array([df.columns for df in data_frames], dtype=str),
+        axis=0,
+    )
+    if len(all_data_columns) != 1:
+        raise ValueError(
+            "Station data frames has different sets of column names."
+        )
+
+    ch4 = da.stack([
+        df["value"].values for df in data_frames
+        ], axis=-1)[..., idx]
+
+    times = date_index.to_pydatetime()
+    time_points = np.array(
+        [datetime(year=t.year, month=t.month, day=15) for t in times])
+    time_bounds_lower = times
+    time_bounds_upper = np.array([
+        datetime(year=t.year + (t.month == 12),
+                 month=t.month + 1 - (t.month == 12) * 12,
+                 day=1) for t in times
+    ])
+    time_bounds = np.stack([time_bounds_lower, time_bounds_upper], axis=-1)
+    time_units = cf_units.Unit("days since 1850-01-01", calendar="standard")
+    time_coord = iris.coords.DimCoord(
+        points=time_units.date2num(time_points),
+        standard_name="time",
+        long_name="time",
+        var_name="time",
+        units=time_units,
+        bounds=time_units.date2num(time_bounds),
+    )
+    index_coord = iris.coords.DimCoord(
+        points=da.arange(ch4.shape[1]),
+        standard_name=None,
+        long_name="Station index (arbitrary)",
+        var_name="station_index",
+        units="1",
+    )
+    code_coord = iris.coords.AuxCoord(
+        points=stations.site_code[idx],
+        standard_name="platform_name",
+        long_name="NOAA GML CCGG Site Name",
+        var_name="site_code",
+    )
+    elevation_coord = iris.coords.AuxCoord(
+        points=stations.site_elevation[idx],
+        standard_name="height_above_mean_sea_level",
+        long_name="Elevation",
+        var_name="elev",
+        units="m",
+    )
+    latitude_coord = iris.coords.AuxCoord(
+        points=stations.site_latitude[idx],
+        standard_name="latitude",
+        long_name="Latitude",
+        var_name="lat",
+        units="degrees_north",
+    )
+    longitude_coord = iris.coords.AuxCoord(
+        points=stations.site_longitude[idx],
+        standard_name="longitude",
+        long_name="Longitude",
+        var_name="lon",
+        units="degrees_east",
+    )
+    cube = iris.cube.Cube(
+        data=da.ma.masked_array(ch4, da.isnan(ch4), fill_value=-999.999),
+        standard_name=(
+            "mole_fraction_of_methane_in_air"),
+        long_name="Mole Fraction of CH4",
+        var_name="ch4s",
+        units="mol mol-1",
+        dim_coords_and_dims=[
+            (time_coord, 0),
+            (index_coord, 1),
+        ],
+        aux_coords_and_dims=[
+            (latitude_coord, 1),
+            (longitude_coord, 1),
+            (elevation_coord, 1),
+            (code_coord, 1),
+        ]
+    )
+    return cube
+
+
+def build_cube(filesystem, paths, filelist):
+    """Build station data cube."""
+    individual_stations = [
+        load_file(filesystem, file_path, filelist) for file_path in paths
+    ]
+    individual_stations = [s for s in individual_stations if s is not None]
+    stations = merge_stations(individual_stations)
+    latlon_points = np.stack([stations.site_latitude, stations.site_longitude], axis=-1)
+    index = S2PointIndex(latlon_points)
+    cell_ids = index.get_cell_ids()
+    idx = np.argsort(cell_ids)
+    cube = assemble_cube(stations, idx)
+    return cube
+
+
+def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date):
+    """Cmorization func call."""
+    raw_filename = cfg['filename']
+
+    tar_file_system = TarFileSystem(f"{in_dir}/{raw_filename}")
+    paths = tar_file_system.glob("ch4_surface-flask_ccgg_text/ch4_*_month.txt")
+    filelist = tar_file_system.glob("ch4_surface-flask_ccgg_text/ch4_*.txt")
+    versions = np.unique(
+        np.array([os.path.basename(p).split("_")[-3] for p in paths],
+                 dtype=str))
+    if len(versions) != 1:
+        raise ValueError(
+            "All station datasets in tar file must have same version."
+        )
+    version = versions[0]
+    cube = build_cube(tar_file_system, paths, filelist)
+
+    attrs = cfg['attributes'].copy()
+    attrs['version'] = version
+    attrs['source'] = attrs['source']
+
+    # Run the cmorization
+    for (short_name, var) in cfg['variables'].items():
+        logger.info("CMORizing variable '%s'", short_name)
+
+        attrs['mip'] = var['mip']
+
+        # Fix metadata
+        utils.set_global_atts(cube, attrs)
+
+        # Save variable
+        utils.save_variable(
+            cube,
+            short_name,
+            out_dir,
+            attrs,
+            unlimited_dimensions=['time'],
+        )
+'''
\ No newline at end of file
diff --git a/esmvaltool/cmorizers/data/formatters/datasets/noaa_gml_surface_flask_co2.py b/esmvaltool/cmorizers/data/formatters/datasets/noaa_gml_surface_flask_co2.py
new file mode 100644
index 0000000000..179698d26a
--- /dev/null
+++ b/esmvaltool/cmorizers/data/formatters/datasets/noaa_gml_surface_flask_co2.py
@@ -0,0 +1,340 @@
+"""ESMValTool CMORizer for NOAA GML surface flask CO2 data.
+
+Tier
+    Tier 2: freely available dataset.
+
+Source
+    https://gml.noaa.gov/
+
+Last access
+    20240730
+
+Download and processing instructions
+    Download the following file:
+    https://gml.noaa.gov/aftp/data/trace_gases/co2/flask/surface/co2_surface-flask_ccgg_text.tar.gz
+"""
+
+from esmvaltool.cmorizers.data.formatters.datasets.noaa_gml_surface_flask import cmorization_noaa_gml_surface_flask_trace_gas
+
+
+def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date):
+    """Cmorization func call."""
+    cmorization_noaa_gml_surface_flask_trace_gas(in_dir, out_dir, cfg, cfg_user, start_date, end_date)
+
+
+'''
+import os
+import logging
+import cf_units
+import dask.array as da
+import iris
+import iris.coords
+import iris.cube
+import pandas as pd
+import numpy as np
+from datetime import datetime
+from typing import NamedTuple
+from pys2index import S2PointIndex
+from fsspec.implementations.tar import TarFileSystem
+
+from esmvaltool.cmorizers.data import utilities as utils
+
+logger = logging.getLogger(__name__)
+
+FLASK_COLUMNS = ['site', 'year', 'month', 'value']
+DTYPE_FLASK_COLUMNS = {'site': str, 'year': int, 'month': int, 'value': float}
+
+class FlaskCO2Station(NamedTuple):
+    """NOAA GML surface flask CO2 station data."""
+
+    site_code: str
+    site_name: str
+    site_country: str
+    site_latitude: float
+    site_longitude: float
+    site_elevation: float
+    site_utc2lst: str
+    data_frame: pd.DataFrame
+
+
+class FlaskCO2Stations(NamedTuple):
+    """NOAA GML surface flask CO2 station data."""
+
+    site_code: list[str]
+    site_name: list[str]
+    site_country: list[str]
+    site_latitude: list[float]
+    site_longitude: list[float]
+    site_elevation: list[float]
+    site_utc2lst: list[str]
+    data_frame: list[pd.DataFrame]
+
+
+def load_file(filesystem, filepath, filelist):
+    """Load NOAA GML surface flask CO2 station data from the text file."""
+    # Determine how many lines to skip in the header
+    skiprows = 0
+    with filesystem.open(filepath, mode='rt') as file:
+        for line in file:
+            if line.startswith("#"):
+                skiprows = skiprows + 1
+    # Read file as CSV
+    with filesystem.open(filepath, mode='rt') as file:
+        data_frame = pd.read_csv(
+            file,
+            delimiter=r'[\s]{1,20}',
+            skiprows=skiprows,
+            header=None,
+            names=FLASK_COLUMNS,
+            dtype=DTYPE_FLASK_COLUMNS,
+            engine='python'
+        )
+    # Fetch data from event file : code, full_name, country, latitude, longitude, elevation, timezone 
+    # Check first if the surface-flask or shipboard-flask file exists
+    filepath_event_alt1 = filepath.replace('month', 'event')
+    filepath_event_alt2 = filepath.replace('month', 'event').replace(
+            'surface-flask_1_ccgg', 'shipboard-flask_1_ccgg')
+    filepath_event = None
+    if filepath_event_alt1 in filelist:
+        filepath_event = filepath_event_alt1
+    elif filepath_event_alt2 in filelist:
+        filepath_event = filepath_event_alt2
+    # Setup default values for additional attributes
+    site_code = filepath.split('/')[-1].split('_')[1].upper()
+    site_name = 'N/A'
+    site_country = 'N/A'
+    site_latitude = np.nan
+    site_longitude = np.nan
+    site_elevation = np.nan
+    site_utc2lst = 'N/A'
+    # Fetch attributes in event file if it exists
+    if filepath_event is not None:
+        with filesystem.open(filepath_event, mode='rt') as file:
+            for line in file:
+                # Observation site code
+                if line.startswith('# site_code :'):
+                    site_code = line.strip().split(' : ')[-1]
+                # Site full name
+                if line.startswith('# site_name :'):
+                    site_name = line.strip().split(' : ')[-1]
+                # Site country
+                if line.startswith('# site_country :'):
+                    site_country = line.strip().split(' : ')[-1]
+                # Site latitude
+                if line.startswith('# site_latitude :'):
+                    site_latitude = float(line.strip().split(' : ')[-1])
+                # Site longitude
+                if line.startswith('# site_longitude :'):
+                    site_longitude = float(line.strip().split(' : ')[-1])
+                # Site elevation
+                if line.startswith('# site_elevation :'):
+                    site_elevation = float(line.strip().split(' : ')[-1])
+                # Site timezone
+                if line.startswith('# site_utc2lst :'):
+                    site_utc2lst = line.strip().split(' : ')[-1]
+    # Check if site location is available otherwise return None
+    if np.any(np.isnan([site_latitude, site_longitude])):
+        return None
+    else:
+        # Datetime index
+        data_frame.index = pd.to_datetime(
+            data_frame['year'].astype(str) + '-' + data_frame['month'].astype(str))    
+        # Create FlaskCO2Station object
+        station = FlaskCO2Station(
+            site_code,
+            site_name,
+            site_country,
+            site_latitude,
+            site_longitude,
+            site_elevation,
+            site_utc2lst,
+            data_frame
+        )
+        return station
+
+
+def merge_stations(stations):
+    """Collect and merge station data into a FlaskCO2Stations instance."""
+    columns = {}
+    for name, dtype in (
+        ("site_code", str),
+        ("site_name", str),
+        ("site_country", str),
+        ("site_latitude", np.float64),
+        ("site_longitude", np.float64),
+        ("site_elevation", np.float64),
+        ("site_utc2lst", str),
+        ("data_frame", object),
+    ):
+        columns[name] = np.array(
+            [getattr(station, name) for station in stations],
+            dtype=dtype,
+        )
+    return FlaskCO2Stations(**columns)
+
+
+def assemble_cube(stations, idx):
+    """Assemble Iris cube with station data.
+
+    Parameters
+    ----------
+    stations : FlaskCO2Stations
+        Station data
+    idx : int
+        Unique ids of all stations
+
+    Returns
+    -------
+    Iris cube
+        Iris cube with station data.
+
+    Raises
+    ------
+    ValueError
+        If station data has inconsistent variable names.
+    """
+    min_time = np.array([df.index.min() for df in stations.data_frame]).min()
+    max_time = np.array([df.index.max() for df in stations.data_frame]).max()
+    date_index = pd.date_range(min_time, max_time, freq="MS")
+    data_frames = [df.reindex(index=date_index) for df in stations.data_frame]
+    all_data_columns = np.unique(
+        np.array([df.columns for df in data_frames], dtype=str),
+        axis=0,
+    )
+    if len(all_data_columns) != 1:
+        raise ValueError(
+            "Station data frames has different sets of column names."
+        )
+
+    co2 = da.stack([
+        df["value"].values for df in data_frames
+        ], axis=-1)[..., idx]
+
+    times = date_index.to_pydatetime()
+    time_points = np.array(
+        [datetime(year=t.year, month=t.month, day=15) for t in times])
+    time_bounds_lower = times
+    time_bounds_upper = np.array([
+        datetime(year=t.year + (t.month == 12),
+                 month=t.month + 1 - (t.month == 12) * 12,
+                 day=1) for t in times
+    ])
+    time_bounds = np.stack([time_bounds_lower, time_bounds_upper], axis=-1)
+    time_units = cf_units.Unit("days since 1850-01-01", calendar="standard")
+    time_coord = iris.coords.DimCoord(
+        points=time_units.date2num(time_points),
+        standard_name="time",
+        long_name="time",
+        var_name="time",
+        units=time_units,
+        bounds=time_units.date2num(time_bounds),
+    )
+    index_coord = iris.coords.DimCoord(
+        points=da.arange(co2.shape[1]),
+        standard_name=None,
+        long_name="Station index (arbitrary)",
+        var_name="station_index",
+        units="1",
+    )
+    code_coord = iris.coords.AuxCoord(
+        points=stations.site_code[idx],
+        standard_name="platform_name",
+        long_name="NOAA GML CCGG Site Name",
+        var_name="site_code",
+    )
+    elevation_coord = iris.coords.AuxCoord(
+        points=stations.site_elevation[idx],
+        standard_name="height_above_mean_sea_level",
+        long_name="Elevation",
+        var_name="elev",
+        units="m",
+    )
+    latitude_coord = iris.coords.AuxCoord(
+        points=stations.site_latitude[idx],
+        standard_name="latitude",
+        long_name="Latitude",
+        var_name="lat",
+        units="degrees_north",
+    )
+    longitude_coord = iris.coords.AuxCoord(
+        points=stations.site_longitude[idx],
+        standard_name="longitude",
+        long_name="Longitude",
+        var_name="lon",
+        units="degrees_east",
+    )
+    cube = iris.cube.Cube(
+        data=da.ma.masked_array(co2, da.isnan(co2), fill_value=-999.999),
+        standard_name=(
+            "mole_fraction_of_carbon_dioxide_in_air"),
+        long_name="Mole Fraction of CO2",
+        var_name="co2s",
+        units="micromol mol-1",
+        dim_coords_and_dims=[
+            (time_coord, 0),
+            (index_coord, 1),
+        ],
+        aux_coords_and_dims=[
+            (latitude_coord, 1),
+            (longitude_coord, 1),
+            (elevation_coord, 1),
+            (code_coord, 1),
+        ]
+    )
+    return cube
+
+
+def build_cube(filesystem, paths, filelist):
+    """Build station data cube."""
+    individual_stations = [
+        load_file(filesystem, file_path, filelist) for file_path in paths
+    ]
+    individual_stations = [s for s in individual_stations if s is not None]
+    stations = merge_stations(individual_stations)
+    latlon_points = np.stack([stations.site_latitude, stations.site_longitude], axis=-1)
+    index = S2PointIndex(latlon_points)
+    cell_ids = index.get_cell_ids()
+    idx = np.argsort(cell_ids)
+    cube = assemble_cube(stations, idx)
+    return cube
+
+
+def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date):
+    """Cmorization func call."""
+    raw_filename = cfg['filename']
+
+    tar_file_system = TarFileSystem(f"{in_dir}/{raw_filename}")
+    paths = tar_file_system.glob("co2_surface-flask_ccgg_text/co2_*_month.txt")
+    filelist = tar_file_system.glob("co2_surface-flask_ccgg_text/co2_*.txt")
+    versions = np.unique(
+        np.array([os.path.basename(p).split("_")[-3] for p in paths],
+                 dtype=str))
+    if len(versions) != 1:
+        raise ValueError(
+            "All station datasets in tar file must have same version."
+        )
+    version = versions[0]
+    cube = build_cube(tar_file_system, paths, filelist)
+
+    attrs = cfg['attributes'].copy()
+    attrs['version'] = version
+    attrs['source'] = attrs['source']
+
+    # Run the cmorization
+    for (short_name, var) in cfg['variables'].items():
+        logger.info("CMORizing variable '%s'", short_name)
+
+        attrs['mip'] = var['mip']
+
+        # Fix metadata
+        utils.set_global_atts(cube, attrs)
+
+        # Save variable
+        utils.save_variable(
+            cube,
+            short_name,
+            out_dir,
+            attrs,
+            unlimited_dimensions=['time'],
+        )
+'''
diff --git a/esmvaltool/cmorizers/data/formatters/datasets/noaa_gml_surface_flask_n2o.py b/esmvaltool/cmorizers/data/formatters/datasets/noaa_gml_surface_flask_n2o.py
new file mode 100644
index 0000000000..e849d46a7e
--- /dev/null
+++ b/esmvaltool/cmorizers/data/formatters/datasets/noaa_gml_surface_flask_n2o.py
@@ -0,0 +1,340 @@
+"""ESMValTool CMORizer for NOAA GML surface flask N2O data.
+
+Tier
+    Tier 2: freely available dataset.
+
+Source
+    https://gml.noaa.gov/
+
+Last access
+    20240730
+
+Download and processing instructions
+    Download the following file:
+    https://gml.noaa.gov/aftp/data/trace_gases/n2o/flask/surface/n2o_surface-flask_ccgg_text.tar.gz
+"""
+
+from esmvaltool.cmorizers.data.formatters.datasets.noaa_gml_surface_flask import cmorization_noaa_gml_surface_flask_trace_gas
+
+
+def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date):
+    """Cmorization func call."""
+    cmorization_noaa_gml_surface_flask_trace_gas(in_dir, out_dir, cfg, cfg_user, start_date, end_date)
+
+
+'''
+import os
+import logging
+import cf_units
+import dask.array as da
+import iris
+import iris.coords
+import iris.cube
+import pandas as pd
+import numpy as np
+from datetime import datetime
+from typing import NamedTuple
+from pys2index import S2PointIndex
+from fsspec.implementations.tar import TarFileSystem
+
+from esmvaltool.cmorizers.data import utilities as utils
+
+logger = logging.getLogger(__name__)
+
+FLASK_COLUMNS = ['site', 'year', 'month', 'value']
+DTYPE_FLASK_COLUMNS = {'site': str, 'year': int, 'month': int, 'value': float}
+
+class FlaskN2OStation(NamedTuple):
+    """NOAA GML surface flask N2O station data."""
+
+    site_code: str
+    site_name: str
+    site_country: str
+    site_latitude: float
+    site_longitude: float
+    site_elevation: float
+    site_utc2lst: str
+    data_frame: pd.DataFrame
+
+
+class FlaskN2OStations(NamedTuple):
+    """NOAA GML surface flask N2O station data."""
+
+    site_code: list[str]
+    site_name: list[str]
+    site_country: list[str]
+    site_latitude: list[float]
+    site_longitude: list[float]
+    site_elevation: list[float]
+    site_utc2lst: list[str]
+    data_frame: list[pd.DataFrame]
+
+
+def load_file(filesystem, filepath, filelist):
+    """Load NOAA GML surface flask N2O station data from the text file."""
+    # Determine how many lines to skip in the header
+    skiprows = 0
+    with filesystem.open(filepath, mode='rt') as file:
+        for line in file:
+            if line.startswith("#"):
+                skiprows = skiprows + 1
+    # Read file as CSV
+    with filesystem.open(filepath, mode='rt') as file:
+        data_frame = pd.read_csv(
+            file,
+            delimiter=r'[\s]{1,20}',
+            skiprows=skiprows,
+            header=None,
+            names=FLASK_COLUMNS,
+            dtype=DTYPE_FLASK_COLUMNS,
+            engine='python'
+        )
+    # Fetch data from event file : code, full_name, country, latitude, longitude, elevation, timezone 
+    # Check first if the surface-flask or shipboard-flask file exists
+    filepath_event_alt1 = filepath.replace('month', 'event')
+    filepath_event_alt2 = filepath.replace('month', 'event').replace(
+            'surface-flask_1_ccgg', 'shipboard-flask_1_ccgg')
+    filepath_event = None
+    if filepath_event_alt1 in filelist:
+        filepath_event = filepath_event_alt1
+    elif filepath_event_alt2 in filelist:
+        filepath_event = filepath_event_alt2
+    # Setup default values for additional attributes
+    site_code = filepath.split('/')[-1].split('_')[1].upper()
+    site_name = 'N/A'
+    site_country = 'N/A'
+    site_latitude = np.nan
+    site_longitude = np.nan
+    site_elevation = np.nan
+    site_utc2lst = 'N/A'
+    # Fetch attributes in event file if it exists
+    if filepath_event is not None:
+        with filesystem.open(filepath_event, mode='rt') as file:
+            for line in file:
+                # Observation site code
+                if line.startswith('# site_code :'):
+                    site_code = line.strip().split(' : ')[-1]
+                # Site full name
+                if line.startswith('# site_name :'):
+                    site_name = line.strip().split(' : ')[-1]
+                # Site country
+                if line.startswith('# site_country :'):
+                    site_country = line.strip().split(' : ')[-1]
+                # Site latitude
+                if line.startswith('# site_latitude :'):
+                    site_latitude = float(line.strip().split(' : ')[-1])
+                # Site longitude
+                if line.startswith('# site_longitude :'):
+                    site_longitude = float(line.strip().split(' : ')[-1])
+                # Site elevation
+                if line.startswith('# site_elevation :'):
+                    site_elevation = float(line.strip().split(' : ')[-1])
+                # Site timezone
+                if line.startswith('# site_utc2lst :'):
+                    site_utc2lst = line.strip().split(' : ')[-1]
+    # Check if site location is available otherwise return None
+    if np.any(np.isnan([site_latitude, site_longitude])):
+        return None
+    else:
+        # Datetime index
+        data_frame.index = pd.to_datetime(
+            data_frame['year'].astype(str) + '-' + data_frame['month'].astype(str))    
+        # Create FlaskN2OStation object
+        station = FlaskN2OStation(
+            site_code,
+            site_name,
+            site_country,
+            site_latitude,
+            site_longitude,
+            site_elevation,
+            site_utc2lst,
+            data_frame
+        )
+        return station
+
+
+def merge_stations(stations):
+    """Collect and merge station data into a FlaskN2OStations instance."""
+    columns = {}
+    for name, dtype in (
+        ("site_code", str),
+        ("site_name", str),
+        ("site_country", str),
+        ("site_latitude", np.float64),
+        ("site_longitude", np.float64),
+        ("site_elevation", np.float64),
+        ("site_utc2lst", str),
+        ("data_frame", object),
+    ):
+        columns[name] = np.array(
+            [getattr(station, name) for station in stations],
+            dtype=dtype,
+        )
+    return FlaskN2OStations(**columns)
+
+
+def assemble_cube(stations, idx):
+    """Assemble Iris cube with station data.
+
+    Parameters
+    ----------
+    stations : FlaskN2OStations
+        Station data
+    idx : int
+        Unique ids of all stations
+
+    Returns
+    -------
+    Iris cube
+        Iris cube with station data.
+
+    Raises
+    ------
+    ValueError
+        If station data has inconsistent variable names.
+    """
+    min_time = np.array([df.index.min() for df in stations.data_frame]).min()
+    max_time = np.array([df.index.max() for df in stations.data_frame]).max()
+    date_index = pd.date_range(min_time, max_time, freq="MS")
+    data_frames = [df.reindex(index=date_index) for df in stations.data_frame]
+    all_data_columns = np.unique(
+        np.array([df.columns for df in data_frames], dtype=str),
+        axis=0,
+    )
+    if len(all_data_columns) != 1:
+        raise ValueError(
+            "Station data frames has different sets of column names."
+        )
+
+    n2o = da.stack([
+        df["value"].values for df in data_frames
+        ], axis=-1)[..., idx]
+
+    times = date_index.to_pydatetime()
+    time_points = np.array(
+        [datetime(year=t.year, month=t.month, day=15) for t in times])
+    time_bounds_lower = times
+    time_bounds_upper = np.array([
+        datetime(year=t.year + (t.month == 12),
+                 month=t.month + 1 - (t.month == 12) * 12,
+                 day=1) for t in times
+    ])
+    time_bounds = np.stack([time_bounds_lower, time_bounds_upper], axis=-1)
+    time_units = cf_units.Unit("days since 1850-01-01", calendar="standard")
+    time_coord = iris.coords.DimCoord(
+        points=time_units.date2num(time_points),
+        standard_name="time",
+        long_name="time",
+        var_name="time",
+        units=time_units,
+        bounds=time_units.date2num(time_bounds),
+    )
+    index_coord = iris.coords.DimCoord(
+        points=da.arange(n2o.shape[1]),
+        standard_name=None,
+        long_name="Station index (arbitrary)",
+        var_name="station_index",
+        units="1",
+    )
+    code_coord = iris.coords.AuxCoord(
+        points=stations.site_code[idx],
+        standard_name="platform_name",
+        long_name="NOAA GML CCGG Site Name",
+        var_name="site_code",
+    )
+    elevation_coord = iris.coords.AuxCoord(
+        points=stations.site_elevation[idx],
+        standard_name="height_above_mean_sea_level",
+        long_name="Elevation",
+        var_name="elev",
+        units="m",
+    )
+    latitude_coord = iris.coords.AuxCoord(
+        points=stations.site_latitude[idx],
+        standard_name="latitude",
+        long_name="Latitude",
+        var_name="lat",
+        units="degrees_north",
+    )
+    longitude_coord = iris.coords.AuxCoord(
+        points=stations.site_longitude[idx],
+        standard_name="longitude",
+        long_name="Longitude",
+        var_name="lon",
+        units="degrees_east",
+    )
+    cube = iris.cube.Cube(
+        data=da.ma.masked_array(n2o, da.isnan(n2o), fill_value=-999.999),
+        standard_name=(
+            "mole_fraction_of_nitrous_oxide_in_air"),
+        long_name="Mole Fraction of CO2",
+        var_name="n2os",
+        units="mol mol-1",
+        dim_coords_and_dims=[
+            (time_coord, 0),
+            (index_coord, 1),
+        ],
+        aux_coords_and_dims=[
+            (latitude_coord, 1),
+            (longitude_coord, 1),
+            (elevation_coord, 1),
+            (code_coord, 1),
+        ]
+    )
+    return cube
+
+
+def build_cube(filesystem, paths, filelist):
+    """Build station data cube."""
+    individual_stations = [
+        load_file(filesystem, file_path, filelist) for file_path in paths
+    ]
+    individual_stations = [s for s in individual_stations if s is not None]
+    stations = merge_stations(individual_stations)
+    latlon_points = np.stack([stations.site_latitude, stations.site_longitude], axis=-1)
+    index = S2PointIndex(latlon_points)
+    cell_ids = index.get_cell_ids()
+    idx = np.argsort(cell_ids)
+    cube = assemble_cube(stations, idx)
+    return cube
+
+
+def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date):
+    """Cmorization func call."""
+    raw_filename = cfg['filename']
+
+    tar_file_system = TarFileSystem(f"{in_dir}/{raw_filename}")
+    paths = tar_file_system.glob("n2o_surface-flask_ccgg_text/n2o_*_month.txt")
+    filelist = tar_file_system.glob("n2o_surface-flask_ccgg_text/n2o_*.txt")
+    versions = np.unique(
+        np.array([os.path.basename(p).split("_")[-3] for p in paths],
+                 dtype=str))
+    if len(versions) != 1:
+        raise ValueError(
+            "All station datasets in tar file must have same version."
+        )
+    version = versions[0]
+    cube = build_cube(tar_file_system, paths, filelist)
+
+    attrs = cfg['attributes'].copy()
+    attrs['version'] = version
+    attrs['source'] = attrs['source']
+
+    # Run the cmorization
+    for (short_name, var) in cfg['variables'].items():
+        logger.info("CMORizing variable '%s'", short_name)
+
+        attrs['mip'] = var['mip']
+
+        # Fix metadata
+        utils.set_global_atts(cube, attrs)
+
+        # Save variable
+        utils.save_variable(
+            cube,
+            short_name,
+            out_dir,
+            attrs,
+            unlimited_dimensions=['time'],
+        )
+'''
\ No newline at end of file
diff --git a/esmvaltool/recipes/examples/recipe_check_obs.yml b/esmvaltool/recipes/examples/recipe_check_obs.yml
index 880aef831a..e03af2610e 100644
--- a/esmvaltool/recipes/examples/recipe_check_obs.yml
+++ b/esmvaltool/recipes/examples/recipe_check_obs.yml
@@ -754,6 +754,33 @@ diagnostics:
          type: reanaly, version: v3b, start_year: 1854, end_year: 2019}
     scripts: null
 
+  NOAA-GML-SURFACE-FLASK-CH4:
+    description: NOAA Global Monitoring Lab Surface flask CH4 data check
+    variables:
+      ch4s:
+    additional_datasets:
+      - {dataset: NOAA-GML-SURFACE-FLASK-CH4, project: OBS6, mip: Amon, type: atmos, version: 1.0, tier: 2,
+         start_year: 1983, end_year: 2023}
+    scripts: null
+
+  NOAA-GML-SURFACE-FLASK-CO2:
+    description: NOAA Global Monitoring Lab Surface flask CO2 data check
+    variables:
+      co2s:
+    additional_datasets:
+      - {dataset: NOAA-GML-SURFACE-FLASK-CO2, project: OBS6, mip: Amon, type: atmos, version: 1.0, tier: 2,
+         start_year: 1968, end_year: 2023}
+    scripts: null
+
+  NOAA-GML-SURFACE-FLASK-N2O:
+    description: NOAA Global Monitoring Lab Surface flask N2O data check
+    variables:
+      n2os:
+    additional_datasets:
+      - {dataset: NOAA-GML-SURFACE-FLASK-N2O, project: OBS6, mip: Amon, type: atmos, version: 1.0, tier: 2,
+         start_year: 1997, end_year: 2023}
+    scripts: null
+
   NOAA-MBL-CH4:
     description: NOAA marine boundary layer CH4 check
     variables:
diff --git a/esmvaltool/references/noaa-gml-surface-flask-ch4.bibtex b/esmvaltool/references/noaa-gml-surface-flask-ch4.bibtex
new file mode 100644
index 0000000000..dd3a6ca7aa
--- /dev/null
+++ b/esmvaltool/references/noaa-gml-surface-flask-ch4.bibtex
@@ -0,0 +1,8 @@
+@misc{noaa-gml-surface-flask-ch4,
+	url = {https://www.esrl.noaa.gov/gmd/ccgg/flask.html},
+	year = 2024,
+	author = {Lan, X., J.W. Mund, A.M. Crotwell, K.W. Thoning, E. Moglia, M. Madronich, K. Baugh, G. Petron, M.J. Crotwell, D. Neff, S. Wolter, T. Mefford and S. DeVogel},
+	title = {Atmospheric Methane Dry Air Mole Fractions from the NOAA GML Carbon Cycle Cooperative Global Air Sampling Network, 1983-2023},
+    doi = {10.15138/VNCZ-M766},
+    howpublished = {via website https://www.esrl.noaa.gov/gmd/ccgg/flask.html, provided by the NOAA Global Monitoring Laboratory, Earth System Research Laboratories.}
+}
\ No newline at end of file
diff --git a/esmvaltool/references/noaa-gml-surface-flask-co2.bibtex b/esmvaltool/references/noaa-gml-surface-flask-co2.bibtex
new file mode 100644
index 0000000000..4bc0213dd4
--- /dev/null
+++ b/esmvaltool/references/noaa-gml-surface-flask-co2.bibtex
@@ -0,0 +1,8 @@
+@misc{noaa-gml-surface-flask-co2,
+	url = {https://www.esrl.noaa.gov/gmd/ccgg/flask.html},
+	year = 2024,
+	author = {Lan, X., J.W. Mund, A.M. Crotwell, K.W. Thoning, E. Moglia, M. Madronich, K. Baugh, G. Petron, M.J. Crotwell, D. Neff, S. Wolter, T. Mefford and S. DeVogel},
+	title = {Atmospheric Carbon Dioxide Dry Air Mole Fractions from the NOAA GML Carbon Cycle Cooperative Global Air Sampling Network, 1968-2023},
+	doi = {10.15138/wkgj-f215},
+    howpublished = {via website https://www.esrl.noaa.gov/gmd/ccgg/flask.html, provided by the NOAA Global Monitoring Laboratory, Earth System Research Laboratories.}
+}
\ No newline at end of file
diff --git a/esmvaltool/references/noaa-gml-surface-flask-n2o.bibtex b/esmvaltool/references/noaa-gml-surface-flask-n2o.bibtex
new file mode 100644
index 0000000000..08fa05045a
--- /dev/null
+++ b/esmvaltool/references/noaa-gml-surface-flask-n2o.bibtex
@@ -0,0 +1,8 @@
+@misc{noaa-gml-surface-flask-n2o,
+	url = {https://www.esrl.noaa.gov/gmd/ccgg/flask.html},
+	year = 2024,
+	author = {Lan, X., J.W. Mund, A.M. Crotwell, K.W. Thoning, E. Moglia, M. Madronich, K. Baugh, G. Petron, M.J. Crotwell, D. Neff, S. Wolter, T. Mefford and S. DeVogel},
+	title = {Atmospheric Nitrous Oxide Dry Air Mole Fractions from the NOAA GML Carbon Cycle Cooperative Global Air Sampling Network, 1997-2023},
+    doi = {10.15138/53g1-x417},
+    howpublished = {via website https://www.esrl.noaa.gov/gmd/ccgg/flask.html, provided by the NOAA Global Monitoring Laboratory, Earth System Research Laboratories.}
+}
\ No newline at end of file

From 541b4c3fb025f3d98438fbfccc109aa9c9f39492 Mon Sep 17 00:00:00 2001
From: Julien Lenhardt <julien.lenhardt@smhi.se>
Date: Thu, 19 Dec 2024 16:46:11 +0100
Subject: [PATCH 2/3] Clean-up of commented code

---
 .../datasets/noaa_gml_surface_flask_ch4.py    | 318 ------------------
 .../datasets/noaa_gml_surface_flask_co2.py    | 318 ------------------
 .../datasets/noaa_gml_surface_flask_n2o.py    | 318 ------------------
 3 files changed, 954 deletions(-)

diff --git a/esmvaltool/cmorizers/data/formatters/datasets/noaa_gml_surface_flask_ch4.py b/esmvaltool/cmorizers/data/formatters/datasets/noaa_gml_surface_flask_ch4.py
index 7c5f2a848a..0837a06c8d 100644
--- a/esmvaltool/cmorizers/data/formatters/datasets/noaa_gml_surface_flask_ch4.py
+++ b/esmvaltool/cmorizers/data/formatters/datasets/noaa_gml_surface_flask_ch4.py
@@ -20,321 +20,3 @@
 def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date):
     """Cmorization func call."""
     cmorization_noaa_gml_surface_flask_trace_gas(in_dir, out_dir, cfg, cfg_user, start_date, end_date)
-
-
-'''
-import os
-import logging
-import cf_units
-import dask.array as da
-import iris
-import iris.coords
-import iris.cube
-import pandas as pd
-import numpy as np
-from datetime import datetime
-from typing import NamedTuple
-from pys2index import S2PointIndex
-from fsspec.implementations.tar import TarFileSystem
-
-from esmvaltool.cmorizers.data import utilities as utils
-
-logger = logging.getLogger(__name__)
-
-FLASK_COLUMNS = ['site', 'year', 'month', 'value']
-DTYPE_FLASK_COLUMNS = {'site': str, 'year': int, 'month': int, 'value': float}
-
-class FlaskCH4Station(NamedTuple):
-    """NOAA GML surface flask CH4 station data."""
-
-    site_code: str
-    site_name: str
-    site_country: str
-    site_latitude: float
-    site_longitude: float
-    site_elevation: float
-    site_utc2lst: str
-    data_frame: pd.DataFrame
-
-
-class FlaskCH4Stations(NamedTuple):
-    """NOAA GML surface flask CH4 station data."""
-
-    site_code: list[str]
-    site_name: list[str]
-    site_country: list[str]
-    site_latitude: list[float]
-    site_longitude: list[float]
-    site_elevation: list[float]
-    site_utc2lst: list[str]
-    data_frame: list[pd.DataFrame]
-
-
-def load_file(filesystem, filepath, filelist):
-    """Load NOAA GML surface flask CH4 station data from the text file."""
-    # Determine how many lines to skip in the header
-    skiprows = 0
-    with filesystem.open(filepath, mode='rt') as file:
-        for line in file:
-            if line.startswith("#"):
-                skiprows = skiprows + 1
-    # Read file as CSV
-    with filesystem.open(filepath, mode='rt') as file:
-        data_frame = pd.read_csv(
-            file,
-            delimiter=r'[\s]{1,20}',
-            skiprows=skiprows,
-            header=None,
-            names=FLASK_COLUMNS,
-            dtype=DTYPE_FLASK_COLUMNS,
-            engine='python'
-        )
-    # Fetch data from event file : code, full_name, country, latitude, longitude, elevation, timezone 
-    # Check first if the surface-flask or shipboard-flask file exists
-    filepath_event_alt1 = filepath.replace('month', 'event')
-    filepath_event_alt2 = filepath.replace('month', 'event').replace(
-            'surface-flask_1_ccgg', 'shipboard-flask_1_ccgg')
-    filepath_event = None
-    if filepath_event_alt1 in filelist:
-        filepath_event = filepath_event_alt1
-    elif filepath_event_alt2 in filelist:
-        filepath_event = filepath_event_alt2
-    # Setup default values for additional attributes
-    site_code = filepath.split('/')[-1].split('_')[1].upper()
-    site_name = 'N/A'
-    site_country = 'N/A'
-    site_latitude = np.nan
-    site_longitude = np.nan
-    site_elevation = np.nan
-    site_utc2lst = 'N/A'
-    # Fetch attributes in event file if it exists
-    if filepath_event is not None:
-        with filesystem.open(filepath_event, mode='rt') as file:
-            for line in file:
-                # Observation site code
-                if line.startswith('# site_code :'):
-                    site_code = line.strip().split(' : ')[-1]
-                # Site full name
-                if line.startswith('# site_name :'):
-                    site_name = line.strip().split(' : ')[-1]
-                # Site country
-                if line.startswith('# site_country :'):
-                    site_country = line.strip().split(' : ')[-1]
-                # Site latitude
-                if line.startswith('# site_latitude :'):
-                    site_latitude = float(line.strip().split(' : ')[-1])
-                # Site longitude
-                if line.startswith('# site_longitude :'):
-                    site_longitude = float(line.strip().split(' : ')[-1])
-                # Site elevation
-                if line.startswith('# site_elevation :'):
-                    site_elevation = float(line.strip().split(' : ')[-1])
-                # Site timezone
-                if line.startswith('# site_utc2lst :'):
-                    site_utc2lst = line.strip().split(' : ')[-1]
-    # Check if site location is available otherwise return None
-    if np.any(np.isnan([site_latitude, site_longitude])):
-        return None
-    else:
-        # Datetime index
-        data_frame.index = pd.to_datetime(
-            data_frame['year'].astype(str) + '-' + data_frame['month'].astype(str))    
-        # Create FlaskCH4Station object
-        station = FlaskCH4Station(
-            site_code,
-            site_name,
-            site_country,
-            site_latitude,
-            site_longitude,
-            site_elevation,
-            site_utc2lst,
-            data_frame
-        )
-        return station
-
-
-def merge_stations(stations):
-    """Collect and merge station data into a FlaskCH4Stations instance."""
-    columns = {}
-    for name, dtype in (
-        ("site_code", str),
-        ("site_name", str),
-        ("site_country", str),
-        ("site_latitude", np.float64),
-        ("site_longitude", np.float64),
-        ("site_elevation", np.float64),
-        ("site_utc2lst", str),
-        ("data_frame", object),
-    ):
-        columns[name] = np.array(
-            [getattr(station, name) for station in stations],
-            dtype=dtype,
-        )
-    return FlaskCH4Stations(**columns)
-
-
-def assemble_cube(stations, idx):
-    """Assemble Iris cube with station data.
-
-    Parameters
-    ----------
-    stations : FlaskCH4Stations
-        Station data
-    idx : int
-        Unique ids of all stations
-
-    Returns
-    -------
-    Iris cube
-        Iris cube with station data.
-
-    Raises
-    ------
-    ValueError
-        If station data has inconsistent variable names.
-    """
-    min_time = np.array([df.index.min() for df in stations.data_frame]).min()
-    max_time = np.array([df.index.max() for df in stations.data_frame]).max()
-    date_index = pd.date_range(min_time, max_time, freq="MS")
-    data_frames = [df.reindex(index=date_index) for df in stations.data_frame]
-    all_data_columns = np.unique(
-        np.array([df.columns for df in data_frames], dtype=str),
-        axis=0,
-    )
-    if len(all_data_columns) != 1:
-        raise ValueError(
-            "Station data frames has different sets of column names."
-        )
-
-    ch4 = da.stack([
-        df["value"].values for df in data_frames
-        ], axis=-1)[..., idx]
-
-    times = date_index.to_pydatetime()
-    time_points = np.array(
-        [datetime(year=t.year, month=t.month, day=15) for t in times])
-    time_bounds_lower = times
-    time_bounds_upper = np.array([
-        datetime(year=t.year + (t.month == 12),
-                 month=t.month + 1 - (t.month == 12) * 12,
-                 day=1) for t in times
-    ])
-    time_bounds = np.stack([time_bounds_lower, time_bounds_upper], axis=-1)
-    time_units = cf_units.Unit("days since 1850-01-01", calendar="standard")
-    time_coord = iris.coords.DimCoord(
-        points=time_units.date2num(time_points),
-        standard_name="time",
-        long_name="time",
-        var_name="time",
-        units=time_units,
-        bounds=time_units.date2num(time_bounds),
-    )
-    index_coord = iris.coords.DimCoord(
-        points=da.arange(ch4.shape[1]),
-        standard_name=None,
-        long_name="Station index (arbitrary)",
-        var_name="station_index",
-        units="1",
-    )
-    code_coord = iris.coords.AuxCoord(
-        points=stations.site_code[idx],
-        standard_name="platform_name",
-        long_name="NOAA GML CCGG Site Name",
-        var_name="site_code",
-    )
-    elevation_coord = iris.coords.AuxCoord(
-        points=stations.site_elevation[idx],
-        standard_name="height_above_mean_sea_level",
-        long_name="Elevation",
-        var_name="elev",
-        units="m",
-    )
-    latitude_coord = iris.coords.AuxCoord(
-        points=stations.site_latitude[idx],
-        standard_name="latitude",
-        long_name="Latitude",
-        var_name="lat",
-        units="degrees_north",
-    )
-    longitude_coord = iris.coords.AuxCoord(
-        points=stations.site_longitude[idx],
-        standard_name="longitude",
-        long_name="Longitude",
-        var_name="lon",
-        units="degrees_east",
-    )
-    cube = iris.cube.Cube(
-        data=da.ma.masked_array(ch4, da.isnan(ch4), fill_value=-999.999),
-        standard_name=(
-            "mole_fraction_of_methane_in_air"),
-        long_name="Mole Fraction of CH4",
-        var_name="ch4s",
-        units="mol mol-1",
-        dim_coords_and_dims=[
-            (time_coord, 0),
-            (index_coord, 1),
-        ],
-        aux_coords_and_dims=[
-            (latitude_coord, 1),
-            (longitude_coord, 1),
-            (elevation_coord, 1),
-            (code_coord, 1),
-        ]
-    )
-    return cube
-
-
-def build_cube(filesystem, paths, filelist):
-    """Build station data cube."""
-    individual_stations = [
-        load_file(filesystem, file_path, filelist) for file_path in paths
-    ]
-    individual_stations = [s for s in individual_stations if s is not None]
-    stations = merge_stations(individual_stations)
-    latlon_points = np.stack([stations.site_latitude, stations.site_longitude], axis=-1)
-    index = S2PointIndex(latlon_points)
-    cell_ids = index.get_cell_ids()
-    idx = np.argsort(cell_ids)
-    cube = assemble_cube(stations, idx)
-    return cube
-
-
-def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date):
-    """Cmorization func call."""
-    raw_filename = cfg['filename']
-
-    tar_file_system = TarFileSystem(f"{in_dir}/{raw_filename}")
-    paths = tar_file_system.glob("ch4_surface-flask_ccgg_text/ch4_*_month.txt")
-    filelist = tar_file_system.glob("ch4_surface-flask_ccgg_text/ch4_*.txt")
-    versions = np.unique(
-        np.array([os.path.basename(p).split("_")[-3] for p in paths],
-                 dtype=str))
-    if len(versions) != 1:
-        raise ValueError(
-            "All station datasets in tar file must have same version."
-        )
-    version = versions[0]
-    cube = build_cube(tar_file_system, paths, filelist)
-
-    attrs = cfg['attributes'].copy()
-    attrs['version'] = version
-    attrs['source'] = attrs['source']
-
-    # Run the cmorization
-    for (short_name, var) in cfg['variables'].items():
-        logger.info("CMORizing variable '%s'", short_name)
-
-        attrs['mip'] = var['mip']
-
-        # Fix metadata
-        utils.set_global_atts(cube, attrs)
-
-        # Save variable
-        utils.save_variable(
-            cube,
-            short_name,
-            out_dir,
-            attrs,
-            unlimited_dimensions=['time'],
-        )
-'''
\ No newline at end of file
diff --git a/esmvaltool/cmorizers/data/formatters/datasets/noaa_gml_surface_flask_co2.py b/esmvaltool/cmorizers/data/formatters/datasets/noaa_gml_surface_flask_co2.py
index 179698d26a..51c02628e1 100644
--- a/esmvaltool/cmorizers/data/formatters/datasets/noaa_gml_surface_flask_co2.py
+++ b/esmvaltool/cmorizers/data/formatters/datasets/noaa_gml_surface_flask_co2.py
@@ -20,321 +20,3 @@
 def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date):
     """Cmorization func call."""
     cmorization_noaa_gml_surface_flask_trace_gas(in_dir, out_dir, cfg, cfg_user, start_date, end_date)
-
-
-'''
-import os
-import logging
-import cf_units
-import dask.array as da
-import iris
-import iris.coords
-import iris.cube
-import pandas as pd
-import numpy as np
-from datetime import datetime
-from typing import NamedTuple
-from pys2index import S2PointIndex
-from fsspec.implementations.tar import TarFileSystem
-
-from esmvaltool.cmorizers.data import utilities as utils
-
-logger = logging.getLogger(__name__)
-
-FLASK_COLUMNS = ['site', 'year', 'month', 'value']
-DTYPE_FLASK_COLUMNS = {'site': str, 'year': int, 'month': int, 'value': float}
-
-class FlaskCO2Station(NamedTuple):
-    """NOAA GML surface flask CO2 station data."""
-
-    site_code: str
-    site_name: str
-    site_country: str
-    site_latitude: float
-    site_longitude: float
-    site_elevation: float
-    site_utc2lst: str
-    data_frame: pd.DataFrame
-
-
-class FlaskCO2Stations(NamedTuple):
-    """NOAA GML surface flask CO2 station data."""
-
-    site_code: list[str]
-    site_name: list[str]
-    site_country: list[str]
-    site_latitude: list[float]
-    site_longitude: list[float]
-    site_elevation: list[float]
-    site_utc2lst: list[str]
-    data_frame: list[pd.DataFrame]
-
-
-def load_file(filesystem, filepath, filelist):
-    """Load NOAA GML surface flask CO2 station data from the text file."""
-    # Determine how many lines to skip in the header
-    skiprows = 0
-    with filesystem.open(filepath, mode='rt') as file:
-        for line in file:
-            if line.startswith("#"):
-                skiprows = skiprows + 1
-    # Read file as CSV
-    with filesystem.open(filepath, mode='rt') as file:
-        data_frame = pd.read_csv(
-            file,
-            delimiter=r'[\s]{1,20}',
-            skiprows=skiprows,
-            header=None,
-            names=FLASK_COLUMNS,
-            dtype=DTYPE_FLASK_COLUMNS,
-            engine='python'
-        )
-    # Fetch data from event file : code, full_name, country, latitude, longitude, elevation, timezone 
-    # Check first if the surface-flask or shipboard-flask file exists
-    filepath_event_alt1 = filepath.replace('month', 'event')
-    filepath_event_alt2 = filepath.replace('month', 'event').replace(
-            'surface-flask_1_ccgg', 'shipboard-flask_1_ccgg')
-    filepath_event = None
-    if filepath_event_alt1 in filelist:
-        filepath_event = filepath_event_alt1
-    elif filepath_event_alt2 in filelist:
-        filepath_event = filepath_event_alt2
-    # Setup default values for additional attributes
-    site_code = filepath.split('/')[-1].split('_')[1].upper()
-    site_name = 'N/A'
-    site_country = 'N/A'
-    site_latitude = np.nan
-    site_longitude = np.nan
-    site_elevation = np.nan
-    site_utc2lst = 'N/A'
-    # Fetch attributes in event file if it exists
-    if filepath_event is not None:
-        with filesystem.open(filepath_event, mode='rt') as file:
-            for line in file:
-                # Observation site code
-                if line.startswith('# site_code :'):
-                    site_code = line.strip().split(' : ')[-1]
-                # Site full name
-                if line.startswith('# site_name :'):
-                    site_name = line.strip().split(' : ')[-1]
-                # Site country
-                if line.startswith('# site_country :'):
-                    site_country = line.strip().split(' : ')[-1]
-                # Site latitude
-                if line.startswith('# site_latitude :'):
-                    site_latitude = float(line.strip().split(' : ')[-1])
-                # Site longitude
-                if line.startswith('# site_longitude :'):
-                    site_longitude = float(line.strip().split(' : ')[-1])
-                # Site elevation
-                if line.startswith('# site_elevation :'):
-                    site_elevation = float(line.strip().split(' : ')[-1])
-                # Site timezone
-                if line.startswith('# site_utc2lst :'):
-                    site_utc2lst = line.strip().split(' : ')[-1]
-    # Check if site location is available otherwise return None
-    if np.any(np.isnan([site_latitude, site_longitude])):
-        return None
-    else:
-        # Datetime index
-        data_frame.index = pd.to_datetime(
-            data_frame['year'].astype(str) + '-' + data_frame['month'].astype(str))    
-        # Create FlaskCO2Station object
-        station = FlaskCO2Station(
-            site_code,
-            site_name,
-            site_country,
-            site_latitude,
-            site_longitude,
-            site_elevation,
-            site_utc2lst,
-            data_frame
-        )
-        return station
-
-
-def merge_stations(stations):
-    """Collect and merge station data into a FlaskCO2Stations instance."""
-    columns = {}
-    for name, dtype in (
-        ("site_code", str),
-        ("site_name", str),
-        ("site_country", str),
-        ("site_latitude", np.float64),
-        ("site_longitude", np.float64),
-        ("site_elevation", np.float64),
-        ("site_utc2lst", str),
-        ("data_frame", object),
-    ):
-        columns[name] = np.array(
-            [getattr(station, name) for station in stations],
-            dtype=dtype,
-        )
-    return FlaskCO2Stations(**columns)
-
-
-def assemble_cube(stations, idx):
-    """Assemble Iris cube with station data.
-
-    Parameters
-    ----------
-    stations : FlaskCO2Stations
-        Station data
-    idx : int
-        Unique ids of all stations
-
-    Returns
-    -------
-    Iris cube
-        Iris cube with station data.
-
-    Raises
-    ------
-    ValueError
-        If station data has inconsistent variable names.
-    """
-    min_time = np.array([df.index.min() for df in stations.data_frame]).min()
-    max_time = np.array([df.index.max() for df in stations.data_frame]).max()
-    date_index = pd.date_range(min_time, max_time, freq="MS")
-    data_frames = [df.reindex(index=date_index) for df in stations.data_frame]
-    all_data_columns = np.unique(
-        np.array([df.columns for df in data_frames], dtype=str),
-        axis=0,
-    )
-    if len(all_data_columns) != 1:
-        raise ValueError(
-            "Station data frames has different sets of column names."
-        )
-
-    co2 = da.stack([
-        df["value"].values for df in data_frames
-        ], axis=-1)[..., idx]
-
-    times = date_index.to_pydatetime()
-    time_points = np.array(
-        [datetime(year=t.year, month=t.month, day=15) for t in times])
-    time_bounds_lower = times
-    time_bounds_upper = np.array([
-        datetime(year=t.year + (t.month == 12),
-                 month=t.month + 1 - (t.month == 12) * 12,
-                 day=1) for t in times
-    ])
-    time_bounds = np.stack([time_bounds_lower, time_bounds_upper], axis=-1)
-    time_units = cf_units.Unit("days since 1850-01-01", calendar="standard")
-    time_coord = iris.coords.DimCoord(
-        points=time_units.date2num(time_points),
-        standard_name="time",
-        long_name="time",
-        var_name="time",
-        units=time_units,
-        bounds=time_units.date2num(time_bounds),
-    )
-    index_coord = iris.coords.DimCoord(
-        points=da.arange(co2.shape[1]),
-        standard_name=None,
-        long_name="Station index (arbitrary)",
-        var_name="station_index",
-        units="1",
-    )
-    code_coord = iris.coords.AuxCoord(
-        points=stations.site_code[idx],
-        standard_name="platform_name",
-        long_name="NOAA GML CCGG Site Name",
-        var_name="site_code",
-    )
-    elevation_coord = iris.coords.AuxCoord(
-        points=stations.site_elevation[idx],
-        standard_name="height_above_mean_sea_level",
-        long_name="Elevation",
-        var_name="elev",
-        units="m",
-    )
-    latitude_coord = iris.coords.AuxCoord(
-        points=stations.site_latitude[idx],
-        standard_name="latitude",
-        long_name="Latitude",
-        var_name="lat",
-        units="degrees_north",
-    )
-    longitude_coord = iris.coords.AuxCoord(
-        points=stations.site_longitude[idx],
-        standard_name="longitude",
-        long_name="Longitude",
-        var_name="lon",
-        units="degrees_east",
-    )
-    cube = iris.cube.Cube(
-        data=da.ma.masked_array(co2, da.isnan(co2), fill_value=-999.999),
-        standard_name=(
-            "mole_fraction_of_carbon_dioxide_in_air"),
-        long_name="Mole Fraction of CO2",
-        var_name="co2s",
-        units="micromol mol-1",
-        dim_coords_and_dims=[
-            (time_coord, 0),
-            (index_coord, 1),
-        ],
-        aux_coords_and_dims=[
-            (latitude_coord, 1),
-            (longitude_coord, 1),
-            (elevation_coord, 1),
-            (code_coord, 1),
-        ]
-    )
-    return cube
-
-
-def build_cube(filesystem, paths, filelist):
-    """Build station data cube."""
-    individual_stations = [
-        load_file(filesystem, file_path, filelist) for file_path in paths
-    ]
-    individual_stations = [s for s in individual_stations if s is not None]
-    stations = merge_stations(individual_stations)
-    latlon_points = np.stack([stations.site_latitude, stations.site_longitude], axis=-1)
-    index = S2PointIndex(latlon_points)
-    cell_ids = index.get_cell_ids()
-    idx = np.argsort(cell_ids)
-    cube = assemble_cube(stations, idx)
-    return cube
-
-
-def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date):
-    """Cmorization func call."""
-    raw_filename = cfg['filename']
-
-    tar_file_system = TarFileSystem(f"{in_dir}/{raw_filename}")
-    paths = tar_file_system.glob("co2_surface-flask_ccgg_text/co2_*_month.txt")
-    filelist = tar_file_system.glob("co2_surface-flask_ccgg_text/co2_*.txt")
-    versions = np.unique(
-        np.array([os.path.basename(p).split("_")[-3] for p in paths],
-                 dtype=str))
-    if len(versions) != 1:
-        raise ValueError(
-            "All station datasets in tar file must have same version."
-        )
-    version = versions[0]
-    cube = build_cube(tar_file_system, paths, filelist)
-
-    attrs = cfg['attributes'].copy()
-    attrs['version'] = version
-    attrs['source'] = attrs['source']
-
-    # Run the cmorization
-    for (short_name, var) in cfg['variables'].items():
-        logger.info("CMORizing variable '%s'", short_name)
-
-        attrs['mip'] = var['mip']
-
-        # Fix metadata
-        utils.set_global_atts(cube, attrs)
-
-        # Save variable
-        utils.save_variable(
-            cube,
-            short_name,
-            out_dir,
-            attrs,
-            unlimited_dimensions=['time'],
-        )
-'''
diff --git a/esmvaltool/cmorizers/data/formatters/datasets/noaa_gml_surface_flask_n2o.py b/esmvaltool/cmorizers/data/formatters/datasets/noaa_gml_surface_flask_n2o.py
index e849d46a7e..35c7ae7250 100644
--- a/esmvaltool/cmorizers/data/formatters/datasets/noaa_gml_surface_flask_n2o.py
+++ b/esmvaltool/cmorizers/data/formatters/datasets/noaa_gml_surface_flask_n2o.py
@@ -20,321 +20,3 @@
 def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date):
     """Cmorization func call."""
     cmorization_noaa_gml_surface_flask_trace_gas(in_dir, out_dir, cfg, cfg_user, start_date, end_date)
-
-
-'''
-import os
-import logging
-import cf_units
-import dask.array as da
-import iris
-import iris.coords
-import iris.cube
-import pandas as pd
-import numpy as np
-from datetime import datetime
-from typing import NamedTuple
-from pys2index import S2PointIndex
-from fsspec.implementations.tar import TarFileSystem
-
-from esmvaltool.cmorizers.data import utilities as utils
-
-logger = logging.getLogger(__name__)
-
-FLASK_COLUMNS = ['site', 'year', 'month', 'value']
-DTYPE_FLASK_COLUMNS = {'site': str, 'year': int, 'month': int, 'value': float}
-
-class FlaskN2OStation(NamedTuple):
-    """NOAA GML surface flask N2O station data."""
-
-    site_code: str
-    site_name: str
-    site_country: str
-    site_latitude: float
-    site_longitude: float
-    site_elevation: float
-    site_utc2lst: str
-    data_frame: pd.DataFrame
-
-
-class FlaskN2OStations(NamedTuple):
-    """NOAA GML surface flask N2O station data."""
-
-    site_code: list[str]
-    site_name: list[str]
-    site_country: list[str]
-    site_latitude: list[float]
-    site_longitude: list[float]
-    site_elevation: list[float]
-    site_utc2lst: list[str]
-    data_frame: list[pd.DataFrame]
-
-
-def load_file(filesystem, filepath, filelist):
-    """Load NOAA GML surface flask N2O station data from the text file."""
-    # Determine how many lines to skip in the header
-    skiprows = 0
-    with filesystem.open(filepath, mode='rt') as file:
-        for line in file:
-            if line.startswith("#"):
-                skiprows = skiprows + 1
-    # Read file as CSV
-    with filesystem.open(filepath, mode='rt') as file:
-        data_frame = pd.read_csv(
-            file,
-            delimiter=r'[\s]{1,20}',
-            skiprows=skiprows,
-            header=None,
-            names=FLASK_COLUMNS,
-            dtype=DTYPE_FLASK_COLUMNS,
-            engine='python'
-        )
-    # Fetch data from event file : code, full_name, country, latitude, longitude, elevation, timezone 
-    # Check first if the surface-flask or shipboard-flask file exists
-    filepath_event_alt1 = filepath.replace('month', 'event')
-    filepath_event_alt2 = filepath.replace('month', 'event').replace(
-            'surface-flask_1_ccgg', 'shipboard-flask_1_ccgg')
-    filepath_event = None
-    if filepath_event_alt1 in filelist:
-        filepath_event = filepath_event_alt1
-    elif filepath_event_alt2 in filelist:
-        filepath_event = filepath_event_alt2
-    # Setup default values for additional attributes
-    site_code = filepath.split('/')[-1].split('_')[1].upper()
-    site_name = 'N/A'
-    site_country = 'N/A'
-    site_latitude = np.nan
-    site_longitude = np.nan
-    site_elevation = np.nan
-    site_utc2lst = 'N/A'
-    # Fetch attributes in event file if it exists
-    if filepath_event is not None:
-        with filesystem.open(filepath_event, mode='rt') as file:
-            for line in file:
-                # Observation site code
-                if line.startswith('# site_code :'):
-                    site_code = line.strip().split(' : ')[-1]
-                # Site full name
-                if line.startswith('# site_name :'):
-                    site_name = line.strip().split(' : ')[-1]
-                # Site country
-                if line.startswith('# site_country :'):
-                    site_country = line.strip().split(' : ')[-1]
-                # Site latitude
-                if line.startswith('# site_latitude :'):
-                    site_latitude = float(line.strip().split(' : ')[-1])
-                # Site longitude
-                if line.startswith('# site_longitude :'):
-                    site_longitude = float(line.strip().split(' : ')[-1])
-                # Site elevation
-                if line.startswith('# site_elevation :'):
-                    site_elevation = float(line.strip().split(' : ')[-1])
-                # Site timezone
-                if line.startswith('# site_utc2lst :'):
-                    site_utc2lst = line.strip().split(' : ')[-1]
-    # Check if site location is available otherwise return None
-    if np.any(np.isnan([site_latitude, site_longitude])):
-        return None
-    else:
-        # Datetime index
-        data_frame.index = pd.to_datetime(
-            data_frame['year'].astype(str) + '-' + data_frame['month'].astype(str))    
-        # Create FlaskN2OStation object
-        station = FlaskN2OStation(
-            site_code,
-            site_name,
-            site_country,
-            site_latitude,
-            site_longitude,
-            site_elevation,
-            site_utc2lst,
-            data_frame
-        )
-        return station
-
-
-def merge_stations(stations):
-    """Collect and merge station data into a FlaskN2OStations instance."""
-    columns = {}
-    for name, dtype in (
-        ("site_code", str),
-        ("site_name", str),
-        ("site_country", str),
-        ("site_latitude", np.float64),
-        ("site_longitude", np.float64),
-        ("site_elevation", np.float64),
-        ("site_utc2lst", str),
-        ("data_frame", object),
-    ):
-        columns[name] = np.array(
-            [getattr(station, name) for station in stations],
-            dtype=dtype,
-        )
-    return FlaskN2OStations(**columns)
-
-
-def assemble_cube(stations, idx):
-    """Assemble Iris cube with station data.
-
-    Parameters
-    ----------
-    stations : FlaskN2OStations
-        Station data
-    idx : int
-        Unique ids of all stations
-
-    Returns
-    -------
-    Iris cube
-        Iris cube with station data.
-
-    Raises
-    ------
-    ValueError
-        If station data has inconsistent variable names.
-    """
-    min_time = np.array([df.index.min() for df in stations.data_frame]).min()
-    max_time = np.array([df.index.max() for df in stations.data_frame]).max()
-    date_index = pd.date_range(min_time, max_time, freq="MS")
-    data_frames = [df.reindex(index=date_index) for df in stations.data_frame]
-    all_data_columns = np.unique(
-        np.array([df.columns for df in data_frames], dtype=str),
-        axis=0,
-    )
-    if len(all_data_columns) != 1:
-        raise ValueError(
-            "Station data frames has different sets of column names."
-        )
-
-    n2o = da.stack([
-        df["value"].values for df in data_frames
-        ], axis=-1)[..., idx]
-
-    times = date_index.to_pydatetime()
-    time_points = np.array(
-        [datetime(year=t.year, month=t.month, day=15) for t in times])
-    time_bounds_lower = times
-    time_bounds_upper = np.array([
-        datetime(year=t.year + (t.month == 12),
-                 month=t.month + 1 - (t.month == 12) * 12,
-                 day=1) for t in times
-    ])
-    time_bounds = np.stack([time_bounds_lower, time_bounds_upper], axis=-1)
-    time_units = cf_units.Unit("days since 1850-01-01", calendar="standard")
-    time_coord = iris.coords.DimCoord(
-        points=time_units.date2num(time_points),
-        standard_name="time",
-        long_name="time",
-        var_name="time",
-        units=time_units,
-        bounds=time_units.date2num(time_bounds),
-    )
-    index_coord = iris.coords.DimCoord(
-        points=da.arange(n2o.shape[1]),
-        standard_name=None,
-        long_name="Station index (arbitrary)",
-        var_name="station_index",
-        units="1",
-    )
-    code_coord = iris.coords.AuxCoord(
-        points=stations.site_code[idx],
-        standard_name="platform_name",
-        long_name="NOAA GML CCGG Site Name",
-        var_name="site_code",
-    )
-    elevation_coord = iris.coords.AuxCoord(
-        points=stations.site_elevation[idx],
-        standard_name="height_above_mean_sea_level",
-        long_name="Elevation",
-        var_name="elev",
-        units="m",
-    )
-    latitude_coord = iris.coords.AuxCoord(
-        points=stations.site_latitude[idx],
-        standard_name="latitude",
-        long_name="Latitude",
-        var_name="lat",
-        units="degrees_north",
-    )
-    longitude_coord = iris.coords.AuxCoord(
-        points=stations.site_longitude[idx],
-        standard_name="longitude",
-        long_name="Longitude",
-        var_name="lon",
-        units="degrees_east",
-    )
-    cube = iris.cube.Cube(
-        data=da.ma.masked_array(n2o, da.isnan(n2o), fill_value=-999.999),
-        standard_name=(
-            "mole_fraction_of_nitrous_oxide_in_air"),
-        long_name="Mole Fraction of CO2",
-        var_name="n2os",
-        units="mol mol-1",
-        dim_coords_and_dims=[
-            (time_coord, 0),
-            (index_coord, 1),
-        ],
-        aux_coords_and_dims=[
-            (latitude_coord, 1),
-            (longitude_coord, 1),
-            (elevation_coord, 1),
-            (code_coord, 1),
-        ]
-    )
-    return cube
-
-
-def build_cube(filesystem, paths, filelist):
-    """Build station data cube."""
-    individual_stations = [
-        load_file(filesystem, file_path, filelist) for file_path in paths
-    ]
-    individual_stations = [s for s in individual_stations if s is not None]
-    stations = merge_stations(individual_stations)
-    latlon_points = np.stack([stations.site_latitude, stations.site_longitude], axis=-1)
-    index = S2PointIndex(latlon_points)
-    cell_ids = index.get_cell_ids()
-    idx = np.argsort(cell_ids)
-    cube = assemble_cube(stations, idx)
-    return cube
-
-
-def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date):
-    """Cmorization func call."""
-    raw_filename = cfg['filename']
-
-    tar_file_system = TarFileSystem(f"{in_dir}/{raw_filename}")
-    paths = tar_file_system.glob("n2o_surface-flask_ccgg_text/n2o_*_month.txt")
-    filelist = tar_file_system.glob("n2o_surface-flask_ccgg_text/n2o_*.txt")
-    versions = np.unique(
-        np.array([os.path.basename(p).split("_")[-3] for p in paths],
-                 dtype=str))
-    if len(versions) != 1:
-        raise ValueError(
-            "All station datasets in tar file must have same version."
-        )
-    version = versions[0]
-    cube = build_cube(tar_file_system, paths, filelist)
-
-    attrs = cfg['attributes'].copy()
-    attrs['version'] = version
-    attrs['source'] = attrs['source']
-
-    # Run the cmorization
-    for (short_name, var) in cfg['variables'].items():
-        logger.info("CMORizing variable '%s'", short_name)
-
-        attrs['mip'] = var['mip']
-
-        # Fix metadata
-        utils.set_global_atts(cube, attrs)
-
-        # Save variable
-        utils.save_variable(
-            cube,
-            short_name,
-            out_dir,
-            attrs,
-            unlimited_dimensions=['time'],
-        )
-'''
\ No newline at end of file

From 7ec5145fdaae00bb6d4b46f1553a27e5733fbd7c Mon Sep 17 00:00:00 2001
From: jlenh <julien.lenhardt@gmail.com>
Date: Thu, 19 Dec 2024 18:52:37 +0100
Subject: [PATCH 3/3] Solve circleCI code style errors

---
 .../datasets/noaa_gml_surface_flask_ch4.py    |  4 +++-
 .../datasets/noaa_gml_surface_flask_co2.py    |  4 +++-
 .../datasets/noaa_gml_surface_flask_n2o.py    |  4 +++-
 .../datasets/noaa_gml_surface_flask.py        | 24 +++++++++++++------
 .../datasets/noaa_gml_surface_flask_ch4.py    |  4 +++-
 .../datasets/noaa_gml_surface_flask_co2.py    |  4 +++-
 .../datasets/noaa_gml_surface_flask_n2o.py    |  4 +++-
 7 files changed, 35 insertions(+), 13 deletions(-)

diff --git a/esmvaltool/cmorizers/data/downloaders/datasets/noaa_gml_surface_flask_ch4.py b/esmvaltool/cmorizers/data/downloaders/datasets/noaa_gml_surface_flask_ch4.py
index 506052a668..859b189082 100644
--- a/esmvaltool/cmorizers/data/downloaders/datasets/noaa_gml_surface_flask_ch4.py
+++ b/esmvaltool/cmorizers/data/downloaders/datasets/noaa_gml_surface_flask_ch4.py
@@ -32,7 +32,9 @@ def download_dataset(config, dataset, dataset_info,
         dataset_info=dataset_info,
         overwrite=overwrite,
     )
+    path = "https://gml.noaa.gov/aftp/data/trace_gases/ch4/flask/surface/"
+    file = "ch4_surface-flask_ccgg_text.tar.gz"
     downloader.download_file(
-        "https://gml.noaa.gov/aftp/data/trace_gases/ch4/flask/surface/ch4_surface-flask_ccgg_text.tar.gz",
+        path + file,
         wget_options=[],
     )
diff --git a/esmvaltool/cmorizers/data/downloaders/datasets/noaa_gml_surface_flask_co2.py b/esmvaltool/cmorizers/data/downloaders/datasets/noaa_gml_surface_flask_co2.py
index dc90249ecf..72ba78ed54 100644
--- a/esmvaltool/cmorizers/data/downloaders/datasets/noaa_gml_surface_flask_co2.py
+++ b/esmvaltool/cmorizers/data/downloaders/datasets/noaa_gml_surface_flask_co2.py
@@ -32,7 +32,9 @@ def download_dataset(config, dataset, dataset_info,
         dataset_info=dataset_info,
         overwrite=overwrite,
     )
+    path = "https://gml.noaa.gov/aftp/data/trace_gases/co2/flask/surface/"
+    file = "co2_surface-flask_ccgg_text.tar.gz"
     downloader.download_file(
-        "https://gml.noaa.gov/aftp/data/trace_gases/co2/flask/surface/co2_surface-flask_ccgg_text.tar.gz",
+        path + file,
         wget_options=[],
     )
diff --git a/esmvaltool/cmorizers/data/downloaders/datasets/noaa_gml_surface_flask_n2o.py b/esmvaltool/cmorizers/data/downloaders/datasets/noaa_gml_surface_flask_n2o.py
index 53c1565e2e..673e0e6019 100644
--- a/esmvaltool/cmorizers/data/downloaders/datasets/noaa_gml_surface_flask_n2o.py
+++ b/esmvaltool/cmorizers/data/downloaders/datasets/noaa_gml_surface_flask_n2o.py
@@ -32,7 +32,9 @@ def download_dataset(config, dataset, dataset_info,
         dataset_info=dataset_info,
         overwrite=overwrite,
     )
+    path = "https://gml.noaa.gov/aftp/data/trace_gases/n2o/flask/surface/"
+    file = "n2o_surface-flask_ccgg_text.tar.gz"
     downloader.download_file(
-        "https://gml.noaa.gov/aftp/data/trace_gases/n2o/flask/surface/n2o_surface-flask_ccgg_text.tar.gz",
+        path + file,
         wget_options=[],
     )
diff --git a/esmvaltool/cmorizers/data/formatters/datasets/noaa_gml_surface_flask.py b/esmvaltool/cmorizers/data/formatters/datasets/noaa_gml_surface_flask.py
index 8cc3d735a7..a1e914278a 100644
--- a/esmvaltool/cmorizers/data/formatters/datasets/noaa_gml_surface_flask.py
+++ b/esmvaltool/cmorizers/data/formatters/datasets/noaa_gml_surface_flask.py
@@ -38,6 +38,7 @@
 DTYPE_FLASK_COLUMNS = {'site': str, 'year': int, 'month': int, 'value': float}
 TRACE_GAS_UNITS = {'ch4s': '1e-09', 'co2s': '1e-06', 'n2os': '1e-09'}
 
+
 class FlaskStation(NamedTuple):
     """NOAA GML surface flask station data."""
 
@@ -83,7 +84,8 @@ def load_file(filesystem, filepath, filelist):
             dtype=DTYPE_FLASK_COLUMNS,
             engine='python'
         )
-    # Fetch data from event file : code, full_name, country, latitude, longitude, elevation, timezone 
+    # Fetch data from event file : code, full_name, country,
+    #  latitude, longitude, elevation, timezone
     # Check first if the surface-flask or shipboard-flask file exists
     filepath_event_alt1 = filepath.replace('month', 'event')
     filepath_event_alt2 = filepath.replace('month', 'event').replace(
@@ -132,7 +134,9 @@ def load_file(filesystem, filepath, filelist):
     else:
         # Datetime index
         data_frame.index = pd.to_datetime(
-            data_frame['year'].astype(str) + '-' + data_frame['month'].astype(str))    
+            data_frame['year'].astype(str)
+            + '-' + data_frame['month'].astype(str)
+        )
         # Create FlaskCO2Station object
         station = FlaskStation(
             site_code,
@@ -260,11 +264,12 @@ def assemble_cube(stations, idx, var_attrs):
         units="degrees_east",
     )
     cube = iris.cube.Cube(
-        data=da.ma.masked_array(trace_gas, da.isnan(trace_gas), fill_value=-999.999),
+        data=da.ma.masked_array(
+            trace_gas, da.isnan(trace_gas), fill_value=-999.999),
         standard_name=(var_attrs['standard_name']),
         long_name=var_attrs['long_name'],
         var_name=var_attrs['raw_name'],
-        units=TRACE_GAS_UNITS[var_attrs['raw_name']],  # var_attrs['raw_units'],
+        units=TRACE_GAS_UNITS[var_attrs['raw_name']],
         dim_coords_and_dims=[
             (time_coord, 0),
             (index_coord, 1),
@@ -284,9 +289,14 @@ def build_cube(filesystem, paths, filelist, var_attrs):
     individual_stations = [
         load_file(filesystem, file_path, filelist) for file_path in paths
     ]
-    individual_stations = [s for s in individual_stations if s is not None]
+    individual_stations = [
+        s for s in individual_stations if s is not None
+    ]
     stations = merge_stations(individual_stations)
-    latlon_points = np.stack([stations.site_latitude, stations.site_longitude], axis=-1)
+    latlon_points = np.stack(
+        [stations.site_latitude, stations.site_longitude],
+        axis=-1
+    )
     index = S2PointIndex(latlon_points)
     cell_ids = index.get_cell_ids()
     idx = np.argsort(cell_ids)
@@ -304,7 +314,7 @@ def cmorization_noaa_gml_surface_flask_trace_gas(
         f'{cfg['trace_gas']}_surface-flask_ccgg_text/{cfg['trace_gas']}_*_month.txt')
     filelist = tar_file_system.glob(
         f'{cfg['trace_gas']}_surface-flask_ccgg_text/{cfg['trace_gas']}_*.txt')
-    
+
     versions = np.unique(
         np.array([os.path.basename(p).split("_")[-3] for p in paths],
                  dtype=str))
diff --git a/esmvaltool/cmorizers/data/formatters/datasets/noaa_gml_surface_flask_ch4.py b/esmvaltool/cmorizers/data/formatters/datasets/noaa_gml_surface_flask_ch4.py
index 0837a06c8d..36f42df1f6 100644
--- a/esmvaltool/cmorizers/data/formatters/datasets/noaa_gml_surface_flask_ch4.py
+++ b/esmvaltool/cmorizers/data/formatters/datasets/noaa_gml_surface_flask_ch4.py
@@ -19,4 +19,6 @@
 
 def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date):
     """Cmorization func call."""
-    cmorization_noaa_gml_surface_flask_trace_gas(in_dir, out_dir, cfg, cfg_user, start_date, end_date)
+    cmorization_noaa_gml_surface_flask_trace_gas(
+        in_dir, out_dir, cfg, cfg_user, start_date, end_date
+    )
diff --git a/esmvaltool/cmorizers/data/formatters/datasets/noaa_gml_surface_flask_co2.py b/esmvaltool/cmorizers/data/formatters/datasets/noaa_gml_surface_flask_co2.py
index 51c02628e1..2c9a6f4482 100644
--- a/esmvaltool/cmorizers/data/formatters/datasets/noaa_gml_surface_flask_co2.py
+++ b/esmvaltool/cmorizers/data/formatters/datasets/noaa_gml_surface_flask_co2.py
@@ -19,4 +19,6 @@
 
 def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date):
     """Cmorization func call."""
-    cmorization_noaa_gml_surface_flask_trace_gas(in_dir, out_dir, cfg, cfg_user, start_date, end_date)
+    cmorization_noaa_gml_surface_flask_trace_gas(
+        in_dir, out_dir, cfg, cfg_user, start_date, end_date
+    )
diff --git a/esmvaltool/cmorizers/data/formatters/datasets/noaa_gml_surface_flask_n2o.py b/esmvaltool/cmorizers/data/formatters/datasets/noaa_gml_surface_flask_n2o.py
index 35c7ae7250..58a13194ed 100644
--- a/esmvaltool/cmorizers/data/formatters/datasets/noaa_gml_surface_flask_n2o.py
+++ b/esmvaltool/cmorizers/data/formatters/datasets/noaa_gml_surface_flask_n2o.py
@@ -19,4 +19,6 @@
 
 def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date):
     """Cmorization func call."""
-    cmorization_noaa_gml_surface_flask_trace_gas(in_dir, out_dir, cfg, cfg_user, start_date, end_date)
+    cmorization_noaa_gml_surface_flask_trace_gas(
+        in_dir, out_dir, cfg, cfg_user, start_date, end_date
+    )