From a0eeae829301d442500c5f1f954ce7ccebb6d766 Mon Sep 17 00:00:00 2001
From: Michael Levy <mike.levy.work@gmail.com>
Date: Tue, 13 Aug 2024 09:43:44 -0600
Subject: [PATCH 1/6] Some clean-up for key_metrics

1. Standardize the way dates are passed (glacier notebook pulls year out of
full date string to match atmosphere)
2. Add option for baseline comparison to atmosphere NMSE
3. Update config.yml to use run 104 (keeping 092 as baseline)
4. Clean up time series generation (allow empty list to specify "don't convert
any files to time series for this component")
---
 cupid/timeseries.py                           |   5 +
 examples/key_metrics/config.yml               |  37 +--
 examples/nblibrary/atm/nmse_PSL.ipynb         | 235 ++++++++++++------
 .../nblibrary/glc/LIWG_SMB_diagnostic.ipynb   |  38 ++-
 examples/nblibrary/glc/utils.py               |   8 +-
 5 files changed, 217 insertions(+), 106 deletions(-)

diff --git a/cupid/timeseries.py b/cupid/timeseries.py
index c1748ef..09ed6af 100644
--- a/cupid/timeseries.py
+++ b/cupid/timeseries.py
@@ -79,6 +79,11 @@ def create_time_series(
 
     """
 
+    # Don't do anything if list of requested diagnostics is empty
+    if not diag_var_list:
+        print(f"\n  No time series files requested for {component}...")
+        return
+
     # Notify user that script has started:
     print(f"\n  Generating {component} time series files...")
 
diff --git a/examples/key_metrics/config.yml b/examples/key_metrics/config.yml
index 0bd44e2..dcc47e8 100644
--- a/examples/key_metrics/config.yml
+++ b/examples/key_metrics/config.yml
@@ -42,8 +42,12 @@ computation_config:
 # All parameters under global_params get passed to all the notebooks
 
 global_params:
-  case_name: 'b.e23_alpha17f.BLT1850.ne30_t232.092'
+  case_name: 'b.e30_beta02.BLT1850.ne30_t232.104'
+  base_case_name: 'b.e23_alpha17f.BLT1850.ne30_t232.092'
   CESM_output_dir: /glade/campaign/cesm/development/cross-wg/diagnostic_framework/CESM_output_for_testing
+  start_date: '0001-01-01'
+  end_date: '0045-01-01'
+  base_end_date: '0101-01-01'
   lc_kwargs:
     threads_per_worker: 1
 
@@ -51,46 +55,46 @@ timeseries:
   num_procs: 8
   ts_done: [False]
   overwrite_ts: [False]
-  case_name: 'b.e23_alpha17f.BLT1850.ne30_t232.092'
+  case_name: 'b.e30_beta02.BLT1850.ne30_t232.104'
 
   atm:
     vars: ['PSL']
     derive_vars: []
     hist_str: 'h0a'
     start_years: [1]
-    end_years: [100]
+    end_years: [44]
     level: 'lev'
 
   lnd:
-    vars: ['ALTMAX', 'COST_NACTIVE', 'DENIT', 'EFLX_DYNBAL']  #['process_all']
+    vars: []
     derive_vars: []
     hist_str: 'h0'
     start_years: [1]
-    end_years: [100]
+    end_years: [44]
     level: 'lev'
 
   ocn:
-    vars: ['taux', 'tauy'] # ['process_all']
+    vars: []
     derive_vars: []
-    hist_str: 'h.frc'
+    hist_str: 'h.z'
     start_years: [1]
-    end_years: [100]
+    end_years: [44]
     level: 'lev'
 
   ice:
-    vars: ['hi', 'hs', 'snowfrac', 'Tsfc'] #['process_all']
+    vars: []
     derive_vars: []
     hist_str: 'h'
     start_years: [1]
-    end_years: [100]
+    end_years: [44]
     level: 'lev'
 
   glc:
-    vars: ['usurf', 'topg'] #['process_all']
+    vars: []
     derive_vars: []
     hist_str: 'initial_hist'
     start_years: [1]
-    end_years: [100]
+    end_years: [44]
     level: 'lev'
 
 compute_notebooks:
@@ -111,10 +115,9 @@ compute_notebooks:
       nmse_PSL:
         parameter_groups:
           none:
-            regridded_output: True
+            regridded_output: False # it looks like output is already on f09 grid, didn't need to regrid time-series file
+            base_regridded_output: True
             validation_path: '/glade/campaign/cesm/development/cross-wg/diagnostic_framework/nmse_validation/fv0.9x1.25'
-            start_date: '0001-01-01'
-            end_date: '0101-01-01'
 
     glc:
       LIWG_SMB_diagnostic:
@@ -122,8 +125,8 @@ compute_notebooks:
           none:
             obs_path: '/glade/u/home/gunterl/obs_diagnostic_cesm/'
             obs_name: 'GrIS_MARv3.12_climo_1960_1999.nc'
-            climo_nyears: 40
-            last_year: 101
+            climo_nyears: 20
+            base_climo_nyears: 40
 
 #    ice:
 #      seaice:
diff --git a/examples/nblibrary/atm/nmse_PSL.ipynb b/examples/nblibrary/atm/nmse_PSL.ipynb
index 93d14c2..4bea30f 100644
--- a/examples/nblibrary/atm/nmse_PSL.ipynb
+++ b/examples/nblibrary/atm/nmse_PSL.ipynb
@@ -75,8 +75,35 @@
     "case_name = \"\"\n",
     "start_date = \"\"\n",
     "end_date = \"\"\n",
+    "base_case_output_dir = None\n",
+    "base_case_name = None\n",
+    "base_start_date = None\n",
+    "base_end_date = None\n",
     "validation_path = \"\"\n",
-    "regridded_output = False"
+    "regridded_output = False\n",
+    "base_regridded_output = None"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9dfe1566-abe3-4b23-a59c-113334a0458f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Want some base case parameter defaults to equal control case values\n",
+    "if base_case_name is not None:\n",
+    "    if base_case_output_dir is None:\n",
+    "        base_case_output_dir = CESM_output_dir\n",
+    "\n",
+    "    if base_start_date is None:\n",
+    "        base_start_date = start_date\n",
+    "\n",
+    "    if base_end_date is None:\n",
+    "        base_end_date = end_date\n",
+    "\n",
+    "    if base_regridded_output is None:\n",
+    "        base_regridded_output = regridded_output"
    ]
   },
   {
@@ -96,14 +123,8 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "ccca8e3a-a52f-4202-9704-9d4470eda984",
-   "metadata": {
-    "editable": true,
-    "slideshow": {
-     "slide_type": ""
-    },
-    "tags": []
-   },
+   "id": "7f4132b5-db1f-4ae8-92df-07dd531b650e",
+   "metadata": {},
    "outputs": [],
    "source": [
     "def fix_time_dim(dat):\n",
@@ -117,9 +138,22 @@
     "    time_bounds_avg = dat[dat.time.attrs[\"bounds\"]].mean(\"nbnd\")\n",
     "    time_bounds_avg.attrs = dat.time.attrs\n",
     "    dat = dat.assign_coords({\"time\": time_bounds_avg})\n",
-    "    return xr.decode_cf(dat)\n",
-    "\n",
-    "\n",
+    "    return xr.decode_cf(dat)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ccca8e3a-a52f-4202-9704-9d4470eda984",
+   "metadata": {
+    "editable": true,
+    "slideshow": {
+     "slide_type": ""
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
     "if regridded_output:\n",
     "    file_path = f\"{CESM_output_dir}/{case_name}/atm/proc/tseries/regrid\"\n",
     "else:\n",
@@ -130,7 +164,30 @@
     "    .sel(time=slice(start_date, end_date))\n",
     "    .PSL\n",
     "    / 100.0\n",
-    ")"
+    ")\n",
+    "\n",
+    "# Ensure all datasets have the same coordinates as the output data\n",
+    "# (Avoid round-off level differences since all data should be on the same grid)\n",
+    "lon = dat.lon.data\n",
+    "lat = dat.lat.data\n",
+    "\n",
+    "if base_case_name is not None:\n",
+    "    if base_regridded_output:\n",
+    "        base_file_path = (\n",
+    "            f\"{base_case_output_dir}/{base_case_name}/atm/proc/tseries/regrid\"\n",
+    "        )\n",
+    "    else:\n",
+    "        base_file_path = f\"{base_case_output_dir}/{base_case_name}/atm/proc/tseries\"\n",
+    "\n",
+    "    base_dat = (\n",
+    "        fix_time_dim(\n",
+    "            xr.open_mfdataset(f\"{base_file_path}/*PSL*.nc\", decode_times=False)\n",
+    "        )\n",
+    "        .sel(time=slice(start_date, end_date))\n",
+    "        .assign_coords({\"lon\": lon, \"lat\": lat})\n",
+    "        .PSL\n",
+    "        / 100.0\n",
+    "    )"
    ]
   },
   {
@@ -147,7 +204,11 @@
    "outputs": [],
    "source": [
     "# --Compute seasonal and annual means\n",
-    "dat = seasonal_climatology_weighted(dat).load()"
+    "dat = seasonal_climatology_weighted(dat).load()\n",
+    "\n",
+    "\n",
+    "if base_case_name is not None:\n",
+    "    base_dat = seasonal_climatology_weighted(base_dat).load()"
    ]
   },
   {
@@ -164,25 +225,6 @@
     "## Read in validation data and other CMIP models for comparison (precomputed)"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "1ff152b1-2168-4a0d-826b-cf8d11f66ab7",
-   "metadata": {
-    "editable": true,
-    "slideshow": {
-     "slide_type": ""
-    },
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "# Ensure all validation datasets have the same coordinates as the ERA5 data\n",
-    "# (Avoid round-off level differences since all data should be on the same grid)\n",
-    "lon = dat.lon.data\n",
-    "lat = dat.lat.data"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -248,13 +290,21 @@
     "nmse_dat = []\n",
     "nmse_cesm2 = []\n",
     "nmse_cmip6 = []\n",
+    "if base_case_name is not None:\n",
+    "    nmse_base_dat = []\n",
+    "else:\n",
+    "    nmse_base_dat = {key: None for key in [\"AM\", \"DJF\", \"MAM\", \"JJA\", \"SON\"]}\n",
     "for ivar in era5.data_vars:\n",
     "    nmse_dat.append(nmse(era5[ivar], dat[ivar]))\n",
     "    nmse_cesm2.append(nmse(era5[ivar], lens2[ivar]))\n",
     "    nmse_cmip6.append(nmse(era5[ivar], datcmip6[ivar]))\n",
+    "    if base_case_name is not None:\n",
+    "        nmse_base_dat.append(nmse(era5[ivar], base_dat[ivar]))\n",
     "nmse_dat = xr.merge(nmse_dat)\n",
     "nmse_cesm2 = xr.merge(nmse_cesm2)\n",
-    "nmse_cmip6 = xr.merge(nmse_cmip6)"
+    "nmse_cmip6 = xr.merge(nmse_cmip6)\n",
+    "if base_case_name is not None:\n",
+    "    nmse_base_dat = xr.merge(nmse_base_dat)"
    ]
   },
   {
@@ -284,7 +334,7 @@
    },
    "outputs": [],
    "source": [
-    "def plotnmse(fig, cmip6, cesm2, cesm3, x1, x2, y1, y2, titlestr):\n",
+    "def plotnmse(fig, cmip6, cesm2, cesm3, cesm_baseline, x1, x2, y1, y2, titlestr):\n",
     "    ax = fig.add_axes([x1, y1, x2 - x1, y2 - y1])\n",
     "\n",
     "    cmip6 = cmip6.sortby(cmip6, ascending=False)\n",
@@ -300,6 +350,8 @@
     "    )\n",
     "\n",
     "    ax.plot(cmip6.size + 1, cesm3, \"o\", color=\"blue\", label=\"THIS RUN\")\n",
+    "    if cesm_baseline is not None:\n",
+    "        ax.plot(cmip6.size + 1, cesm3, \"x\", color=\"red\", label=\"BASELINE\")\n",
     "\n",
     "    ax.fill_between(\n",
     "        np.arange(0, cmip6.size + 3, 1) - 0.5,\n",
@@ -335,22 +387,72 @@
    "source": [
     "fig = plt.figure(figsize=(16, 16))\n",
     "\n",
+    "vert_coord = 0.99\n",
+    "fig.text(\n",
+    "    0.5,\n",
+    "    vert_coord,\n",
+    "    \"THIS RUN = \" + case_name + \" \" + start_date + \" to \" + end_date,\n",
+    "    ha=\"center\",\n",
+    "    va=\"center\",\n",
+    "    fontsize=14,\n",
+    "    color=\"royalblue\",\n",
+    ")\n",
+    "vert_coord = vert_coord - 0.015\n",
+    "if base_case_name is not None:\n",
+    "    fig.text(\n",
+    "        0.5,\n",
+    "        vert_coord,\n",
+    "        \"BASELINE RUN = \"\n",
+    "        + base_case_name\n",
+    "        + \" \"\n",
+    "        + base_start_date\n",
+    "        + \" to \"\n",
+    "        + base_end_date,\n",
+    "        ha=\"center\",\n",
+    "        va=\"center\",\n",
+    "        fontsize=14,\n",
+    "        color=\"red\",\n",
+    "    )\n",
+    "    vert_coord = vert_coord - 0.015\n",
+    "\n",
+    "fig.text(\n",
+    "    0.5,\n",
+    "    vert_coord,\n",
+    "    \"Other runs = 1979-01-01 to 2023-12-31\",\n",
+    "    ha=\"center\",\n",
+    "    va=\"center\",\n",
+    "    fontsize=14,\n",
+    ")\n",
+    "vert_coord = vert_coord - 0.015\n",
+    "\n",
+    "fig.text(\n",
+    "    0.5,\n",
+    "    vert_coord,\n",
+    "    \"Validation data = ERA5 1979-01-01 to 2023-12-31\",\n",
+    "    ha=\"center\",\n",
+    "    va=\"center\",\n",
+    "    fontsize=14,\n",
+    ")\n",
+    "vert_coord = vert_coord - 0.03\n",
+    "\n",
     "ax = plotnmse(\n",
     "    fig,\n",
-    "    nmse_cmip6.AM,\n",
-    "    nmse_cesm2.AM,\n",
-    "    nmse_dat.AM,\n",
+    "    nmse_cmip6[\"AM\"],\n",
+    "    nmse_cesm2[\"AM\"],\n",
+    "    nmse_dat[\"AM\"],\n",
+    "    nmse_base_dat[\"AM\"],\n",
     "    0.3,\n",
     "    0.7,\n",
-    "    0.77,\n",
-    "    0.93,\n",
+    "    vert_coord - 0.16,\n",
+    "    vert_coord,\n",
     "    \"NMSE, SLP, AM\",\n",
     ")\n",
     "ax = plotnmse(\n",
     "    fig,\n",
-    "    nmse_cmip6.DJF,\n",
-    "    nmse_cesm2.DJF,\n",
-    "    nmse_dat.DJF,\n",
+    "    nmse_cmip6[\"DJF\"],\n",
+    "    nmse_cesm2[\"DJF\"],\n",
+    "    nmse_dat[\"DJF\"],\n",
+    "    nmse_base_dat[\"DJF\"],\n",
     "    0.05,\n",
     "    0.45,\n",
     "    0.57,\n",
@@ -359,9 +461,10 @@
     ")\n",
     "ax = plotnmse(\n",
     "    fig,\n",
-    "    nmse_cmip6.MAM,\n",
-    "    nmse_cesm2.MAM,\n",
-    "    nmse_dat.MAM,\n",
+    "    nmse_cmip6[\"MAM\"],\n",
+    "    nmse_cesm2[\"MAM\"],\n",
+    "    nmse_dat[\"MAM\"],\n",
+    "    nmse_base_dat[\"MAM\"],\n",
     "    0.55,\n",
     "    0.95,\n",
     "    0.57,\n",
@@ -370,9 +473,10 @@
     ")\n",
     "ax = plotnmse(\n",
     "    fig,\n",
-    "    nmse_cmip6.JJA,\n",
-    "    nmse_cesm2.JJA,\n",
-    "    nmse_dat.JJA,\n",
+    "    nmse_cmip6[\"JJA\"],\n",
+    "    nmse_cesm2[\"JJA\"],\n",
+    "    nmse_dat[\"JJA\"],\n",
+    "    nmse_base_dat[\"JJA\"],\n",
     "    0.05,\n",
     "    0.45,\n",
     "    0.37,\n",
@@ -381,40 +485,15 @@
     ")\n",
     "ax = plotnmse(\n",
     "    fig,\n",
-    "    nmse_cmip6.SON,\n",
-    "    nmse_cesm2.SON,\n",
-    "    nmse_dat.SON,\n",
+    "    nmse_cmip6[\"SON\"],\n",
+    "    nmse_cesm2[\"SON\"],\n",
+    "    nmse_dat[\"SON\"],\n",
+    "    nmse_base_dat[\"SON\"],\n",
     "    0.55,\n",
     "    0.95,\n",
     "    0.37,\n",
     "    0.52,\n",
     "    \"NMSE, SLP, SON\",\n",
-    ")\n",
-    "\n",
-    "fig.text(\n",
-    "    0.5,\n",
-    "    0.99,\n",
-    "    \"THIS RUN = \" + case_name + \" \" + start_date + \" to \" + end_date,\n",
-    "    ha=\"center\",\n",
-    "    va=\"center\",\n",
-    "    fontsize=14,\n",
-    "    color=\"royalblue\",\n",
-    ")\n",
-    "fig.text(\n",
-    "    0.5,\n",
-    "    0.975,\n",
-    "    \"Other runs = 1979-01-01 to 2023-12-31\",\n",
-    "    ha=\"center\",\n",
-    "    va=\"center\",\n",
-    "    fontsize=14,\n",
-    ")\n",
-    "fig.text(\n",
-    "    0.5,\n",
-    "    0.96,\n",
-    "    \"Validation data = ERA5 1979-01-01 to 2023-12-31\",\n",
-    "    ha=\"center\",\n",
-    "    va=\"center\",\n",
-    "    fontsize=14,\n",
     ")"
    ]
   }
diff --git a/examples/nblibrary/glc/LIWG_SMB_diagnostic.ipynb b/examples/nblibrary/glc/LIWG_SMB_diagnostic.ipynb
index fc23c64..7748d45 100644
--- a/examples/nblibrary/glc/LIWG_SMB_diagnostic.ipynb
+++ b/examples/nblibrary/glc/LIWG_SMB_diagnostic.ipynb
@@ -70,17 +70,37 @@
     "\n",
     "CESM_output_dir = \"\"\n",
     "case_name = \"\"  # case name\n",
-    "climo_nyears = 0  # number of years to compute the climatology\n",
-    "last_year = 0\n",
+    "climo_nyears = 0  # number of years to compute the climatology for main case\n",
+    "end_date = \"\"\n",
     "\n",
-    "base_case_output_dir = CESM_output_dir\n",
+    "base_case_output_dir = None\n",
     "base_case_name = None\n",
-    "base_last_year = last_year\n",
+    "base_end_date = None\n",
+    "base_climo_nyears = 0  # number of years to compute the climatology for base case\n",
     "\n",
     "obs_path = \"\"  # directory containing observed dataset\n",
     "obs_name = \"\"  # file name for observed dataset"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2cd40a4e-95b6-4ce7-9e48-694ef3b9ebac",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Want some base case parameter defaults to equal control case values\n",
+    "if base_case_name is not None:\n",
+    "    if base_case_output_dir is None:\n",
+    "        base_case_output_dir = CESM_output_dir\n",
+    "\n",
+    "    if base_end_date is None:\n",
+    "        base_end_date = end_date\n",
+    "\n",
+    "    if base_climo_nyears == 0:\n",
+    "        base_climo_nyears = climo_nyears"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -88,13 +108,16 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "last_year = int(end_date.split(\"-\")[0])\n",
+    "base_last_year = int(base_end_date.split(\"-\")[0])\n",
+    "\n",
     "case_init_file = f\"{CESM_output_dir}/{case_name}/glc/hist/{case_name}.cism.gris.initial_hist.0001-01-01-00000.nc\"  # name of glc file output\n",
     "\n",
     "case_path = f\"{CESM_output_dir}/{case_name}/cpl/hist\"  # path to glc output\n",
     "case_file = f\"{case_path}/{case_name}.cpl.hx.1yr2glc.{last_year:04d}-01-01-00000.nc\"  # name of glc file output\n",
     "obs_file = f\"{obs_path}/{obs_name}\"  # name of observed dataset file\n",
     "\n",
-    "if base_case_name:\n",
+    "if base_case_name is not None:\n",
     "    base_case_path = (\n",
     "        f\"{base_case_output_dir}/{base_case_name}/cpl/hist\"  # path to cpl output\n",
     "    )\n",
@@ -152,7 +175,6 @@
    "source": [
     "params = {\n",
     "    \"grid_dims\": grid_dims,\n",
-    "    \"climo_nyears\": climo_nyears,\n",
     "    \"mm_to_Gt\": mm_to_Gt,\n",
     "    \"mask\": mask,\n",
     "}"
@@ -178,13 +200,13 @@
    "outputs": [],
    "source": [
     "# creating the SMB climatology for new case\n",
-    "smb_case = utils.read_cesm_smb(case_path, case_name, last_year, params)\n",
+    "smb_case = utils.read_cesm_smb(case_path, case_name, last_year, climo_nyears, params)\n",
     "smb_case_climo = smb_case.mean(\"time\")\n",
     "\n",
     "# creating the SMB climatology for base_case\n",
     "if base_case_name:\n",
     "    smb_base_case = utils.read_cesm_smb(\n",
-    "        base_case_path, base_case_name, base_last_year, params\n",
+    "        base_case_path, base_case_name, base_last_year, base_climo_nyears, params\n",
     "    )\n",
     "    smb_base_climo = smb_base_case.mean(\"time\")"
    ]
diff --git a/examples/nblibrary/glc/utils.py b/examples/nblibrary/glc/utils.py
index 2a11bc3..c6c573e 100644
--- a/examples/nblibrary/glc/utils.py
+++ b/examples/nblibrary/glc/utils.py
@@ -7,7 +7,7 @@
 from matplotlib import pyplot as plt
 
 
-def read_cesm_smb(path, case_name, last_year, params):
+def read_cesm_smb(path, case_name, last_year, climo_nyears, params):
     """
     This function reads CESM coupler history files and returns
     an xarray DataArray containing surface mass balance in units mm/y
@@ -18,7 +18,7 @@ def read_cesm_smb(path, case_name, last_year, params):
     smb_convert = sec_in_yr / rhoi * 1000  # converting kg m-2 s-1 ice to mm y-1 w.e.
 
     filenames = []
-    for k in range(params["climo_nyears"]):
+    for k in range(climo_nyears):
 
         year_to_read = last_year - k
         filename = (
@@ -26,7 +26,9 @@ def read_cesm_smb(path, case_name, last_year, params):
         )
 
         if not os.path.isfile(filename):
-            print(f"The couple file for time {year_to_read} does not exist.")
+            print(
+                f"Looked for {filename} (for time {year_to_read}) but it does not exist.",
+            )
             print(
                 "We will only use the files that existed until now to create the SMB climatology.",
             )

From 8c484ef836302fe6c5817bd2bdd937887af14f1a Mon Sep 17 00:00:00 2001
From: Michael Levy <mike.levy.work@gmail.com>
Date: Tue, 13 Aug 2024 13:00:10 -0600
Subject: [PATCH 2/6] Clean up glacier notebook

1. Only set base_last_year if a baseline case has been provided
2. Update time series plots to account for differences between climo_nyears and
base_climo_nyears
---
 examples/nblibrary/glc/LIWG_SMB_diagnostic.ipynb | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/examples/nblibrary/glc/LIWG_SMB_diagnostic.ipynb b/examples/nblibrary/glc/LIWG_SMB_diagnostic.ipynb
index 7748d45..ed20047 100644
--- a/examples/nblibrary/glc/LIWG_SMB_diagnostic.ipynb
+++ b/examples/nblibrary/glc/LIWG_SMB_diagnostic.ipynb
@@ -109,7 +109,6 @@
    "outputs": [],
    "source": [
     "last_year = int(end_date.split(\"-\")[0])\n",
-    "base_last_year = int(base_end_date.split(\"-\")[0])\n",
     "\n",
     "case_init_file = f\"{CESM_output_dir}/{case_name}/glc/hist/{case_name}.cism.gris.initial_hist.0001-01-01-00000.nc\"  # name of glc file output\n",
     "\n",
@@ -118,6 +117,7 @@
     "obs_file = f\"{obs_path}/{obs_name}\"  # name of observed dataset file\n",
     "\n",
     "if base_case_name is not None:\n",
+    "    base_last_year = int(base_end_date.split(\"-\")[0])\n",
     "    base_case_path = (\n",
     "        f\"{base_case_output_dir}/{base_case_name}/cpl/hist\"  # path to cpl output\n",
     "    )\n",
@@ -412,14 +412,16 @@
     "\n",
     "\n",
     "time = np.arange(first_year, last_year + 1)\n",
+    "full_time = time\n",
     "if base_case_name:\n",
     "    base_time = (\n",
-    "        np.arange(base_first_year, base_last_year + 1) + last_year - base_last_year\n",
+    "        np.arange(base_first_year, base_last_year + 1) + first_year - base_first_year\n",
     "    )\n",
     "    base_nt = len(base_time)\n",
+    "    full_time = np.arange(time[0], max(time[-1], base_time[-1]))\n",
     "nt = len(time)\n",
     "\n",
-    "avg_smb_obs_timeseries = np.zeros(nt)\n",
+    "avg_smb_obs_timeseries = np.zeros(len(full_time))\n",
     "avg_smb_case_timeseries = np.zeros(nt)\n",
     "if base_case_name:\n",
     "    avg_smb_base_timeseries = np.zeros(base_nt)\n",
@@ -430,7 +432,7 @@
     "    avg_smb_base_timeseries[:] = np.round(smb_base_climo.sum() * mm_to_Gt, 2)\n",
     "\n",
     "\n",
-    "x_ticks = np.arange(first_year, last_year + 2, 5)\n",
+    "x_ticks = np.arange(full_time[0], full_time[-1] + 2, 5)\n",
     "tickx = x_ticks\n",
     "\n",
     "ymin = 100\n",
@@ -478,7 +480,7 @@
     "    )\n",
     "utils.plot_line(\n",
     "    avg_smb_obs_timeseries[:],\n",
-    "    time,\n",
+    "    full_time,\n",
     "    line=\"-\",\n",
     "    color=\"black\",\n",
     "    label=\"Observations (mean)\",\n",

From 8beae249f8089a27e42386d0d1b59b544346d322 Mon Sep 17 00:00:00 2001
From: Michael Levy <mike.levy.work@gmail.com>
Date: Mon, 26 Aug 2024 16:10:22 -0600
Subject: [PATCH 3/6] Case 104 has 21 more years of data

Also fixed a bug in the NMSE plotting
---
 examples/key_metrics/config.yml       | 14 +++++++-------
 examples/nblibrary/atm/nmse_PSL.ipynb |  2 +-
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/examples/key_metrics/config.yml b/examples/key_metrics/config.yml
index dcc47e8..4465246 100644
--- a/examples/key_metrics/config.yml
+++ b/examples/key_metrics/config.yml
@@ -46,7 +46,7 @@ global_params:
   base_case_name: 'b.e23_alpha17f.BLT1850.ne30_t232.092'
   CESM_output_dir: /glade/campaign/cesm/development/cross-wg/diagnostic_framework/CESM_output_for_testing
   start_date: '0001-01-01'
-  end_date: '0045-01-01'
+  end_date: '0066-01-01'
   base_end_date: '0101-01-01'
   lc_kwargs:
     threads_per_worker: 1
@@ -62,7 +62,7 @@ timeseries:
     derive_vars: []
     hist_str: 'h0a'
     start_years: [1]
-    end_years: [44]
+    end_years: [65]
     level: 'lev'
 
   lnd:
@@ -70,7 +70,7 @@ timeseries:
     derive_vars: []
     hist_str: 'h0'
     start_years: [1]
-    end_years: [44]
+    end_years: [65]
     level: 'lev'
 
   ocn:
@@ -78,7 +78,7 @@ timeseries:
     derive_vars: []
     hist_str: 'h.z'
     start_years: [1]
-    end_years: [44]
+    end_years: [65]
     level: 'lev'
 
   ice:
@@ -86,7 +86,7 @@ timeseries:
     derive_vars: []
     hist_str: 'h'
     start_years: [1]
-    end_years: [44]
+    end_years: [65]
     level: 'lev'
 
   glc:
@@ -94,7 +94,7 @@ timeseries:
     derive_vars: []
     hist_str: 'initial_hist'
     start_years: [1]
-    end_years: [44]
+    end_years: [65]
     level: 'lev'
 
 compute_notebooks:
@@ -125,7 +125,7 @@ compute_notebooks:
           none:
             obs_path: '/glade/u/home/gunterl/obs_diagnostic_cesm/'
             obs_name: 'GrIS_MARv3.12_climo_1960_1999.nc'
-            climo_nyears: 20
+            climo_nyears: 30
             base_climo_nyears: 40
 
 #    ice:
diff --git a/examples/nblibrary/atm/nmse_PSL.ipynb b/examples/nblibrary/atm/nmse_PSL.ipynb
index 4bea30f..b682e38 100644
--- a/examples/nblibrary/atm/nmse_PSL.ipynb
+++ b/examples/nblibrary/atm/nmse_PSL.ipynb
@@ -351,7 +351,7 @@
     "\n",
     "    ax.plot(cmip6.size + 1, cesm3, \"o\", color=\"blue\", label=\"THIS RUN\")\n",
     "    if cesm_baseline is not None:\n",
-    "        ax.plot(cmip6.size + 1, cesm3, \"x\", color=\"red\", label=\"BASELINE\")\n",
+    "        ax.plot(cmip6.size + 1, cesm_baseline, \"x\", color=\"red\", label=\"BASELINE\")\n",
     "\n",
     "    ax.fill_between(\n",
     "        np.arange(0, cmip6.size + 3, 1) - 0.5,\n",

From 4d4dea84a842303e4da905d3a4aa1d1ecfeb962c Mon Sep 17 00:00:00 2001
From: Michael Levy <mike.levy.work@gmail.com>
Date: Wed, 28 Aug 2024 14:07:56 -0600
Subject: [PATCH 4/6] Use hide-input and hide-cell tags

For key_metrics examples, the hide-input tucks the python code away behind a
drop-down menu. There is one cell in the glacier notebook that uses hide-cell
instead of hide-input because the output isn't important (printing a message
about the number of years used to compute climatology)
---
 examples/nblibrary/atm/nmse_PSL.ipynb         |  51 +++++--
 .../nblibrary/glc/LIWG_SMB_diagnostic.ipynb   | 139 ++++++++++++++++--
 2 files changed, 165 insertions(+), 25 deletions(-)

diff --git a/examples/nblibrary/atm/nmse_PSL.ipynb b/examples/nblibrary/atm/nmse_PSL.ipynb
index b682e38..7909c8d 100644
--- a/examples/nblibrary/atm/nmse_PSL.ipynb
+++ b/examples/nblibrary/atm/nmse_PSL.ipynb
@@ -26,7 +26,9 @@
     "slideshow": {
      "slide_type": ""
     },
-    "tags": []
+    "tags": [
+     "hide-input"
+    ]
    },
    "outputs": [],
    "source": [
@@ -66,7 +68,8 @@
      "slide_type": ""
     },
     "tags": [
-     "parameters"
+     "parameters",
+     "hide-input"
     ]
    },
    "outputs": [],
@@ -88,7 +91,15 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "9dfe1566-abe3-4b23-a59c-113334a0458f",
-   "metadata": {},
+   "metadata": {
+    "editable": true,
+    "slideshow": {
+     "slide_type": ""
+    },
+    "tags": [
+     "hide-input"
+    ]
+   },
    "outputs": [],
    "source": [
     "# Want some base case parameter defaults to equal control case values\n",
@@ -124,7 +135,15 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "7f4132b5-db1f-4ae8-92df-07dd531b650e",
-   "metadata": {},
+   "metadata": {
+    "editable": true,
+    "slideshow": {
+     "slide_type": ""
+    },
+    "tags": [
+     "hide-input"
+    ]
+   },
    "outputs": [],
    "source": [
     "def fix_time_dim(dat):\n",
@@ -150,7 +169,9 @@
     "slideshow": {
      "slide_type": ""
     },
-    "tags": []
+    "tags": [
+     "hide-input"
+    ]
    },
    "outputs": [],
    "source": [
@@ -199,7 +220,9 @@
     "slideshow": {
      "slide_type": ""
     },
-    "tags": []
+    "tags": [
+     "hide-input"
+    ]
    },
    "outputs": [],
    "source": [
@@ -234,7 +257,9 @@
     "slideshow": {
      "slide_type": ""
     },
-    "tags": []
+    "tags": [
+     "hide-input"
+    ]
    },
    "outputs": [],
    "source": [
@@ -283,7 +308,9 @@
     "slideshow": {
      "slide_type": ""
     },
-    "tags": []
+    "tags": [
+     "hide-input"
+    ]
    },
    "outputs": [],
    "source": [
@@ -330,7 +357,9 @@
     "slideshow": {
      "slide_type": ""
     },
-    "tags": []
+    "tags": [
+     "hide-input"
+    ]
    },
    "outputs": [],
    "source": [
@@ -381,7 +410,9 @@
     "slideshow": {
      "slide_type": ""
     },
-    "tags": []
+    "tags": [
+     "hide-input"
+    ]
    },
    "outputs": [],
    "source": [
diff --git a/examples/nblibrary/glc/LIWG_SMB_diagnostic.ipynb b/examples/nblibrary/glc/LIWG_SMB_diagnostic.ipynb
index ed20047..252f234 100644
--- a/examples/nblibrary/glc/LIWG_SMB_diagnostic.ipynb
+++ b/examples/nblibrary/glc/LIWG_SMB_diagnostic.ipynb
@@ -24,7 +24,15 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "81bedf90-288c-4cfa-add5-b199ca9bcf72",
-   "metadata": {},
+   "metadata": {
+    "editable": true,
+    "slideshow": {
+     "slide_type": ""
+    },
+    "tags": [
+     "hide-input"
+    ]
+   },
    "outputs": [],
    "source": [
     "# Import packages\n",
@@ -61,7 +69,8 @@
      "slide_type": ""
     },
     "tags": [
-     "parameters"
+     "parameters",
+     "hide-input"
     ]
    },
    "outputs": [],
@@ -86,7 +95,15 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "2cd40a4e-95b6-4ce7-9e48-694ef3b9ebac",
-   "metadata": {},
+   "metadata": {
+    "editable": true,
+    "slideshow": {
+     "slide_type": ""
+    },
+    "tags": [
+     "hide-input"
+    ]
+   },
    "outputs": [],
    "source": [
     "# Want some base case parameter defaults to equal control case values\n",
@@ -105,7 +122,15 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "cef60ddb-9ff4-4a14-a8ea-0d5740b6c18a",
-   "metadata": {},
+   "metadata": {
+    "editable": true,
+    "slideshow": {
+     "slide_type": ""
+    },
+    "tags": [
+     "hide-input"
+    ]
+   },
    "outputs": [],
    "source": [
     "last_year = int(end_date.split(\"-\")[0])\n",
@@ -138,7 +163,15 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "27373d08-084b-4c3f-8c3f-d5c8a445b2dc",
-   "metadata": {},
+   "metadata": {
+    "editable": true,
+    "slideshow": {
+     "slide_type": ""
+    },
+    "tags": [
+     "hide-input"
+    ]
+   },
    "outputs": [],
    "source": [
     "## Get grid from initial_hist stream\n",
@@ -153,7 +186,15 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "05eee529-d42f-4872-8cf2-f484ca44bf3f",
-   "metadata": {},
+   "metadata": {
+    "editable": true,
+    "slideshow": {
+     "slide_type": ""
+    },
+    "tags": [
+     "hide-input"
+    ]
+   },
    "outputs": [],
    "source": [
     "# Constants\n",
@@ -170,7 +211,15 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "44eda4c4-3c23-450d-83d8-c76204cefdc4",
-   "metadata": {},
+   "metadata": {
+    "editable": true,
+    "slideshow": {
+     "slide_type": ""
+    },
+    "tags": [
+     "hide-input"
+    ]
+   },
    "outputs": [],
    "source": [
     "params = {\n",
@@ -183,7 +232,13 @@
   {
    "cell_type": "markdown",
    "id": "68fca423-582b-4179-8771-16250a5f1904",
-   "metadata": {},
+   "metadata": {
+    "editable": true,
+    "slideshow": {
+     "slide_type": ""
+    },
+    "tags": []
+   },
    "source": [
     "## Make datasets\n",
     "\n",
@@ -196,7 +251,15 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "bb81be67-98d6-4924-a90e-930d9b2caed8",
-   "metadata": {},
+   "metadata": {
+    "editable": true,
+    "slideshow": {
+     "slide_type": ""
+    },
+    "tags": [
+     "hide-cell"
+    ]
+   },
    "outputs": [],
    "source": [
     "# creating the SMB climatology for new case\n",
@@ -215,7 +278,15 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "ac96bb16-7bd8-4d7b-b00b-d315feeb1a5d",
-   "metadata": {},
+   "metadata": {
+    "editable": true,
+    "slideshow": {
+     "slide_type": ""
+    },
+    "tags": [
+     "hide-input"
+    ]
+   },
    "outputs": [],
    "source": [
     "# Interpolating the observed data onto the CISM grid\n",
@@ -250,7 +321,15 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "a38682c9-dc87-4d7b-887d-8abbbe8a7265",
-   "metadata": {},
+   "metadata": {
+    "editable": true,
+    "slideshow": {
+     "slide_type": ""
+    },
+    "tags": [
+     "hide-input"
+    ]
+   },
    "outputs": [],
    "source": [
     "# Integrated SMB time series\n",
@@ -267,7 +346,13 @@
   {
    "cell_type": "markdown",
    "id": "1641747b-4997-45ad-bf70-981ed97688dd",
-   "metadata": {},
+   "metadata": {
+    "editable": true,
+    "slideshow": {
+     "slide_type": ""
+    },
+    "tags": []
+   },
    "source": [
     "## Generate plots\n",
     "\n",
@@ -280,7 +365,15 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "c7973cfe-64e0-47d4-a1b6-73cd9e62fdb2",
-   "metadata": {},
+   "metadata": {
+    "editable": true,
+    "slideshow": {
+     "slide_type": ""
+    },
+    "tags": [
+     "hide-input"
+    ]
+   },
    "outputs": [],
    "source": [
     "# Comparing SMB new run vs obs\n",
@@ -341,7 +434,15 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "176594bc-53a1-4934-8210-7aa7d62f5659",
-   "metadata": {},
+   "metadata": {
+    "editable": true,
+    "slideshow": {
+     "slide_type": ""
+    },
+    "tags": [
+     "hide-input"
+    ]
+   },
    "outputs": [],
    "source": [
     "# Comparing SMB new run vs base case\n",
@@ -401,7 +502,15 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "01af1cd1-0351-452c-99e7-125546469f69",
-   "metadata": {},
+   "metadata": {
+    "editable": true,
+    "slideshow": {
+     "slide_type": ""
+    },
+    "tags": [
+     "hide-input"
+    ]
+   },
    "outputs": [],
    "source": [
     "# Plotting the SMB spatially averaged time series\n",

From a3a2c0f5343041820595fc28cf2e99be25c15713 Mon Sep 17 00:00:00 2001
From: Michael Levy <mike.levy.work@gmail.com>
Date: Wed, 28 Aug 2024 16:16:03 -0600
Subject: [PATCH 5/6] key_metrics generates time-series for both cases

Updated the timeseries block of config.yml to include both case_name and
base_case_name; this uncovered a few issues in constructing the arguments for
the call to create_time_series()
---
 cupid/run.py                    | 61 ++++++++++++++++++++++++++-------
 examples/key_metrics/config.yml | 32 ++++++++---------
 2 files changed, 63 insertions(+), 30 deletions(-)

diff --git a/cupid/run.py b/cupid/run.py
index 0686bda..ef45915 100755
--- a/cupid/run.py
+++ b/cupid/run.py
@@ -111,19 +111,54 @@ def run(
         for component, comp_bool in component_options.items():
             if comp_bool:
 
-                # set time series output directory:
+                # set time series input and output directory:
                 # -----
+                if isinstance(timeseries_params["case_name"], list):
+                    ts_input_dirs = []
+                    for cname in timeseries_params["case_name"]:
+                        ts_input_dirs.append(global_params["CESM_output_dir"]+"/"+cname+f"/{component}/hist/")
+                else:
+                    ts_input_dirs = [
+                        global_params["CESM_output_dir"] + "/" +
+                        timeseries_params["case_name"] + f"/{component}/hist/",
+                    ]
+
                 if "ts_output_dir" in timeseries_params:
-                    ts_output_dir = os.path.join(
-                            timeseries_params["ts_output_dir"],
-                            f"{component}", "proc", "tseries",
-                    )
+                    if isinstance(timeseries_params["ts_output_dir"], list):
+                        ts_output_dirs = []
+                        for ts_outdir in timeseries_params["ts_output_dir"]:
+                            ts_output_dirs.append([
+                                os.path.join(
+                                        ts_outdir,
+                                        f"{component}", "proc", "tseries",
+                                ),
+                            ])
+                    else:
+                        ts_output_dirs = [
+                            os.path.join(
+                                    timeseries_params["ts_output_dir"],
+                                    f"{component}", "proc", "tseries",
+                            ),
+                        ]
                 else:
-                    ts_output_dir = os.path.join(
-                            global_params["CESM_output_dir"],
-                            timeseries_params["case_name"],
-                            f"{component}", "proc", "tseries",
-                    )
+                    if isinstance(timeseries_params["case_name"], list):
+                        ts_output_dirs = []
+                        for cname in timeseries_params["case_name"]:
+                            ts_output_dirs.append(
+                                os.path.join(
+                                        global_params["CESM_output_dir"],
+                                        cname,
+                                        f"{component}", "proc", "tseries",
+                                ),
+                            )
+                    else:
+                        ts_output_dirs = [
+                            os.path.join(
+                                    global_params["CESM_output_dir"],
+                                    timeseries_params["case_name"],
+                                    f"{component}", "proc", "tseries",
+                            ),
+                        ]
                 # -----
 
                 # fmt: off
@@ -132,10 +167,10 @@ def run(
                     component,
                     timeseries_params[component]["vars"],
                     timeseries_params[component]["derive_vars"],
-                    [timeseries_params["case_name"]],
+                    timeseries_params["case_name"],
                     timeseries_params[component]["hist_str"],
-                    [global_params["CESM_output_dir"]+"/"+timeseries_params["case_name"]+f"/{component}/hist/"],
-                    [ts_output_dir],
+                    ts_input_dirs,
+                    ts_output_dirs,
                     # Note that timeseries output will eventually go in
                     #   /glade/derecho/scratch/${USER}/archive/${CASE}/${component}/proc/tseries/
                     timeseries_params["ts_done"],
diff --git a/examples/key_metrics/config.yml b/examples/key_metrics/config.yml
index 4465246..879d477 100644
--- a/examples/key_metrics/config.yml
+++ b/examples/key_metrics/config.yml
@@ -46,55 +46,54 @@ global_params:
   base_case_name: 'b.e23_alpha17f.BLT1850.ne30_t232.092'
   CESM_output_dir: /glade/campaign/cesm/development/cross-wg/diagnostic_framework/CESM_output_for_testing
   start_date: '0001-01-01'
-  end_date: '0066-01-01'
-  base_end_date: '0101-01-01'
+  end_date: '0101-01-01'
   lc_kwargs:
     threads_per_worker: 1
 
 timeseries:
   num_procs: 8
-  ts_done: [False]
-  overwrite_ts: [False]
-  case_name: 'b.e30_beta02.BLT1850.ne30_t232.104'
+  ts_done: [False, False]
+  overwrite_ts: [False, False]
+  case_name: ['b.e30_beta02.BLT1850.ne30_t232.104', 'b.e23_alpha17f.BLT1850.ne30_t232.092']
 
   atm:
     vars: ['PSL']
     derive_vars: []
     hist_str: 'h0a'
-    start_years: [1]
-    end_years: [65]
+    start_years: [1,1]
+    end_years: [100,100]
     level: 'lev'
 
   lnd:
     vars: []
     derive_vars: []
     hist_str: 'h0'
-    start_years: [1]
-    end_years: [65]
+    start_years: [1,1]
+    end_years: [100,100]
     level: 'lev'
 
   ocn:
     vars: []
     derive_vars: []
     hist_str: 'h.z'
-    start_years: [1]
-    end_years: [65]
+    start_years: [1,1]
+    end_years: [100,100]
     level: 'lev'
 
   ice:
     vars: []
     derive_vars: []
     hist_str: 'h'
-    start_years: [1]
-    end_years: [65]
+    start_years: [1,1]
+    end_years: [100,100]
     level: 'lev'
 
   glc:
     vars: []
     derive_vars: []
     hist_str: 'initial_hist'
-    start_years: [1]
-    end_years: [65]
+    start_years: [1,1]
+    end_years: [100,100]
     level: 'lev'
 
 compute_notebooks:
@@ -125,8 +124,7 @@ compute_notebooks:
           none:
             obs_path: '/glade/u/home/gunterl/obs_diagnostic_cesm/'
             obs_name: 'GrIS_MARv3.12_climo_1960_1999.nc'
-            climo_nyears: 30
-            base_climo_nyears: 40
+            climo_nyears: 40
 
 #    ice:
 #      seaice:

From c0ab02ab624ea55d463dbfa5ef1725082b9d9fc1 Mon Sep 17 00:00:00 2001
From: Michael Levy <mike.levy.work@gmail.com>
Date: Wed, 28 Aug 2024 16:37:54 -0600
Subject: [PATCH 6/6] Update time axis for glacier plot

---
 examples/nblibrary/glc/LIWG_SMB_diagnostic.ipynb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/nblibrary/glc/LIWG_SMB_diagnostic.ipynb b/examples/nblibrary/glc/LIWG_SMB_diagnostic.ipynb
index 252f234..5065ebe 100644
--- a/examples/nblibrary/glc/LIWG_SMB_diagnostic.ipynb
+++ b/examples/nblibrary/glc/LIWG_SMB_diagnostic.ipynb
@@ -527,7 +527,7 @@
     "        np.arange(base_first_year, base_last_year + 1) + first_year - base_first_year\n",
     "    )\n",
     "    base_nt = len(base_time)\n",
-    "    full_time = np.arange(time[0], max(time[-1], base_time[-1]))\n",
+    "    full_time = np.arange(time[0], max(time[-1], base_time[-1]) + 1)\n",
     "nt = len(time)\n",
     "\n",
     "avg_smb_obs_timeseries = np.zeros(len(full_time))\n",