Skip to content

Commit

Permalink
First steps towards using xarray
Browse files Browse the repository at this point in the history
Started replacing netcdf4 calls with xarray equivalents. I think the
observational dataset read is still entirely netcdf4-based, so that needs to be
updated. Also, I want to try calling xr.open_mfdataset() once per case and then
passing datasets rather than doing the read twice (once for temporal mean, once
for spatial mean).
  • Loading branch information
mnlevy1981 committed Jul 26, 2024
1 parent e9ddda3 commit 6b94626
Show file tree
Hide file tree
Showing 2 changed files with 73 additions and 117 deletions.
94 changes: 30 additions & 64 deletions examples/nblibrary/glc/LIWG_SMB_diagnostic.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"import matplotlib.cm as mcm\n",
"from netCDF4 import Dataset\n",
"from scipy.interpolate import RegularGridInterpolator\n",
"import xarray as xr\n",
"\n",
"import utils\n",
"\n",
Expand Down Expand Up @@ -108,23 +108,16 @@
{
"cell_type": "code",
"execution_count": null,
"id": "52ba1301-0c4c-481c-a57b-a9614160807b",
"id": "27373d08-084b-4c3f-8c3f-d5c8a445b2dc",
"metadata": {},
"outputs": [],
"source": [
"## CISM grid information and loading a field used for filtering\n",
"\n",
"# Reading the information we need from the glc file\n",
"nid = Dataset(case_init_file, \"r\")\n",
"x_cism = nid.variables[\"x1\"][:]\n",
"y_cism = nid.variables[\"y1\"][:]\n",
"thk_cism = np.squeeze(nid.variables[\"thk\"][0, :, :])\n",
"nid.close()\n",
"\n",
"# Defining the grid dimensions\n",
"## For the CISM grid\n",
"nx_cism = len(x_cism)\n",
"ny_cism = len(y_cism)"
"## Get grid from initial_hist stream\n",
"thk_init_da = xr.open_dataset(case_init_file).isel(time=0)[\"thk\"]\n",
"mask = thk_init_da.data[:, :] == 0\n",
"\n",
"# Shape of array is (ny, nx)\n",
"grid_dims = thk_init_da.shape"
]
},
{
Expand All @@ -135,7 +128,9 @@
"outputs": [],
"source": [
"# Constants\n",
"res = np.abs(x_cism[1] - x_cism[0]) # CISM output resolution\n",
"res = np.abs(\n",
" thk_init_da[\"x1\"].data[1] - thk_init_da[\"x1\"].data[0]\n",
") # CISM output resolution\n",
"\n",
"rhow = 1000 # water density kg/m3\n",
"kg_to_Gt = 1e-12 # Converting kg to Gt\n",
Expand All @@ -150,10 +145,10 @@
"outputs": [],
"source": [
"params = {\n",
" \"ny_cism\": ny_cism,\n",
" \"nx_cism\": nx_cism,\n",
" \"grid_dims\": grid_dims,\n",
" \"climo_nyears\": climo_nyears,\n",
" \"mm_to_Gt\": mm_to_Gt,\n",
" \"mask\": mask,\n",
"}"
]
},
Expand All @@ -169,25 +164,6 @@
"(global mean for time series, temporal mean for climatology)."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "69891e0a-0cc6-48bd-a9bf-b61e19da13b4",
"metadata": {},
"outputs": [],
"source": [
"## The observed dataset\n",
"nid = Dataset(obs_file, \"r\")\n",
"x_obs = nid.variables[\"x\"][:]\n",
"y_obs = nid.variables[\"y\"][:]\n",
"smb_obs_src = np.squeeze(nid.variables[\"SMB\"][0, :, :])\n",
"nid.close()\n",
"\n",
"## For the observed grid\n",
"nx_obs = len(x_obs)\n",
"ny_obs = len(y_obs)"
]
},
{
"cell_type": "code",
"execution_count": null,
Expand All @@ -213,41 +189,30 @@
"outputs": [],
"source": [
"# Interpolating the observed data onto the CISM grid\n",
"smb_obs_da = xr.open_dataset(obs_file).isel(time=0)[\"SMB\"]\n",
"\n",
"# Defining the interpolation functions\n",
"myInterpFunction_smb_obs = RegularGridInterpolator(\n",
" (x_obs, y_obs),\n",
" smb_obs_src.transpose(),\n",
" (smb_obs_da[\"x\"].data, smb_obs_da[\"y\"].data),\n",
" smb_obs_da.data.transpose(),\n",
" method=\"linear\",\n",
" bounds_error=False,\n",
" fill_value=None,\n",
")\n",
"\n",
"# Initializing the glacier ID variable\n",
"smb_obs_climo = np.zeros((ny_cism, nx_cism))\n",
"smb_obs_climo = np.zeros(grid_dims)\n",
"\n",
"# Performing the interpolation\n",
"for j in range(ny_cism):\n",
" point_y = np.zeros(nx_cism)\n",
" point_y[:] = y_cism[j]\n",
" pts = (x_cism[:], point_y[:])\n",
" smb_obs_climo[j, :] = myInterpFunction_smb_obs(pts)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "288f34cf-808f-4de1-8372-276fdbcfdc46",
"metadata": {},
"outputs": [],
"source": [
"for j in range(grid_dims[0]):\n",
" point_y = np.zeros(grid_dims[1])\n",
" point_y[:] = thk_init_da[\"y1\"].data[j]\n",
" pts = (thk_init_da[\"x1\"].data[:], point_y[:])\n",
" smb_obs_climo[j, :] = myInterpFunction_smb_obs(pts)\n",
"\n",
"# Filtering out fill values\n",
"smb_obs_climo = np.where(smb_obs_climo > 1e20, 0, smb_obs_climo)\n",
"mask = thk_cism[:, :] == 0\n",
"smb_obs_climo = np.where(mask, 0, smb_obs_climo)\n",
"smb_case_climo = np.where(mask, 0, smb_case_climo)\n",
"if base_case_name:\n",
" smb_base_climo = np.where(mask, 0, smb_base_climo)"
"smb_obs_climo = np.where(mask, 0, smb_obs_climo)"
]
},
{
Expand All @@ -258,7 +223,6 @@
"outputs": [],
"source": [
"# Integrated SMB time series\n",
"params[\"mask\"] = mask\n",
"first_year, avg_smb_case_climo = utils.compute_annual_climo(\n",
" case_path, case_name, last_year, params\n",
")\n",
Expand Down Expand Up @@ -405,9 +369,11 @@
"# what comparisons make sense when base case is HIST and new case is 1850?\n",
"\n",
"\n",
"time = np.arange(first_year, last_year)\n",
"time = np.arange(first_year, last_year + 1)\n",
"if base_case_name:\n",
" base_time = np.arange(base_first_year, base_last_year) + last_year - base_last_year\n",
" base_time = (\n",
" np.arange(base_first_year, base_last_year + 1) + last_year - base_last_year\n",
" )\n",
" base_nt = len(base_time)\n",
"nt = len(time)\n",
"\n",
Expand Down Expand Up @@ -436,7 +402,7 @@
"# Plotting annual / spatial means\n",
"plt.subplot(111)\n",
"utils.plot_line(\n",
" avg_smb_case_climo[first_year::],\n",
" avg_smb_case_climo[first_year - 1 : :],\n",
" time,\n",
" line=\"-\",\n",
" color=\"blue\",\n",
Expand All @@ -453,7 +419,7 @@
")\n",
"if base_case_name:\n",
" utils.plot_line(\n",
" avg_smb_base_case_climo[base_first_year::],\n",
" avg_smb_base_case_climo[base_first_year - 1 : :],\n",
" base_time,\n",
" line=\"-\",\n",
" color=\"red\",\n",
Expand Down
96 changes: 43 additions & 53 deletions examples/nblibrary/glc/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import os

import numpy as np
import xarray as xr
from matplotlib import pyplot as plt
from netCDF4 import Dataset

Expand Down Expand Up @@ -56,14 +57,13 @@ def read_smb(file):
return smb_cism


def create_climo(path, case_name, last_year, params):
# Initializing a field for the climatology
climo_out = np.zeros((params["ny_cism"], params["nx_cism"]))

# Counter for available year (only needed if the number of years available is smaller
# than the number of years requested to create the climatology.
count_yr = 0
def _get_cesm_output(path, case_name, last_year, params):
# Set parameters
rhoi = 917 # ice density kg/m3
sec_in_yr = 60 * 60 * 24 * 365 # seconds in a year
smb_convert = sec_in_yr / rhoi * 1000 # converting kg m-2 s-1 ice to mm y-1 w.e.

filenames = []
for k in range(params["climo_nyears"]):

year_to_read = last_year - k
Expand All @@ -78,12 +78,44 @@ def create_climo(path, case_name, last_year, params):
)
break

climo_out = climo_out + read_smb(filename)
count_yr = count_yr + 1
filenames.append(filename)

climo_out = (
xr.open_mfdataset(filenames)["glc1Exp_Flgl_qice"].compute() * smb_convert
)
# Mask out data that is 0 in initial condition
for k in range(len(climo_out["time"])):
climo_out.data[k, :, :] = np.where(
params["mask"],
0,
climo_out.isel(time=k).data,
)
print("number of years used in climatology = ", len(climo_out["time"]))
return climo_out


def create_climo(path, case_name, last_year, params):

climo_out = _get_cesm_output(path, case_name, last_year, params)

print("number of years used in climatology = ", count_yr)
# Averaging the climo data
return climo_out / count_yr
return climo_out.mean("time").data


def compute_annual_climo(path, case_name, last_year, params):
# Initializing a field for the climatology
avg_smb_timeseries = np.zeros(last_year)
climo_out = _get_cesm_output(path, case_name, last_year, params)
for k in range(len(climo_out["time"])):
# index into avg_smb_timeseries; want largest k to index (last_year-1)
kk = last_year - (len(climo_out["time"]) - k)
# note that mm_to_Gt has 1 / res**2 factor, so we want a sum rather than mean
avg_smb_timeseries[kk] = np.round(
climo_out.isel(time=k).sum() * params["mm_to_Gt"],
2,
).data

return last_year - len(climo_out["time"]) + 1, avg_smb_timeseries


def plot_contour(data, fig, ax, title, vmin, vmax, cmap, mm_to_Gt):
Expand Down Expand Up @@ -116,48 +148,6 @@ def plot_contour_diff(data_new, data_old, fig, ax, title, vmin, vmax, cmap, mm_t
cbar.ax.tick_params(labelsize=16)


def compute_annual_climo(path, case_name, last_year, params):
# Initializing a field for the climatology
avg_smb_timeseries = np.zeros(last_year)

# Counter for available year (only needed if the number of years available is smaller
# than the number of years requested to create the climatology.
count_yr = 0

for k in range(last_year):

year_to_read = last_year - k
file_name = (
f"{path}/{case_name}.cpl.hx.1yr2glc.{year_to_read:04d}-01-01-00000.nc"
)

if not os.path.isfile(file_name):
print("The couple file for time", year_to_read, "does not exist.")
print(
"We will only use the files that existed until now to create the time series.",
)
break

smb_temp = read_smb(file_name)
smb_temp = np.where(params["mask"], 0, smb_temp)

avg_smb_timeseries[year_to_read - 1] = np.round(
net_avrg(smb_temp) * params["mm_to_Gt"],
2,
)
count_yr = count_yr + 1

if count_yr == params["climo_nyears"]:
break

del smb_temp

first_year = year_to_read

print("number of years used in climatology = ", count_yr)
return first_year, avg_smb_timeseries


def plot_line(data, time, line, color, label, linewidth):
plt.plot(
time,
Expand Down

0 comments on commit 6b94626

Please sign in to comment.