Skip to content

Commit

Permalink
alrightly, lets move some of this hardcoding to the top of the file, …
Browse files Browse the repository at this point in the history
…see how it goes...
  • Loading branch information
ilaflott committed Oct 25, 2024
1 parent 6285283 commit 4b31338
Showing 1 changed file with 77 additions and 129 deletions.
206 changes: 77 additions & 129 deletions fre/cmor/cmor_mixer.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,66 +14,17 @@
import click
import cmor

# ------ \start assumptions / potential future configuration thingies.
# GLOBAL hardcoded assumption: netcdf files always have an ending of '.nc'
# many implicit assumptions regarding the presence of metadata in the input netcdf file name
# e.g. datetime, variable name, name_of_set are all assumed to be at particular spots
# utf8 encoding
# MINOR: opening netcdf files in append or write or read
# MINOR: key names in certain input configuration files- these are tightly controlled
#
# for check_dataset_for_ocean_grids:
# input reading/checking hardcode - dataset doesn't have a variable named 'xh'
# for get_vertical_dimension:
# input reading/checking hardcode - dataset has dimension/axis name 'Z'
# for create_tmp_dir:
# input reading/checking hardcode - check output directory for specific drives local2, work, net
# output moving/writing hardcode - tmpdir name is 'tmp' if condition met
#
# for rewrite_netcdf_file_var:
# input reading/checking hardcode - dimensions named 'lat', 'lon', 'time'
# input reading/checking hardcode - "bounds" for above, named 'lat_bnds', 'lon_bnds', 'time_bnds'
# input reading/checking hardcode - check that var_dim is 3 or 4
# input reading/checking hardcode - check that var_dim is 3 --> simple 3 dim subcase
# input reading/checking hardcode - if var_dim is 4, vert_dim must be one of the following:
# "plev30", "plev19", "plev8","height2m", "level", "lev", "levhalf"
# input reading/checking hardcode - then subcases are relevant as follows:
# if vert_dim in ["plev30", "plev19", "plev8", "height2m"] --> SUBCASE
# elif vert_dim in ["level", "lev", "levhalf"] --> DISTINCT SUBCASE
# pressure input file is tagged with 'ps' potentially nearby
# sub_sub_case --> if vert_dim is lev_half
# input ds has zfactor values as 'ap_bnds', 'b_bnds'
# output moving/writing hardcode - output zfactors have names "ap_half", "b_half",
# output vertical level axis name "alternate_hybrid_sigma_half"
# sub_sub_case --> else
# input ds has zfactor values as 'ap' and 'b', and zfactor bnds as 'ap_bnds', 'b_bnds'
# output moving/writing hardcode - output zfactors have names "ap", "b",
# output vertical level axis name "alternate_hybrid_sigma"
# output moving/writing hardcode - output interpolated pressures have name "ps", units "Pa"
# output moving/writing hardcode - cmor setup parameters
# output moving/writing hardcode - lat/lon axies named "latitude"/"longitude" with units "degrees_N" "degrees_E"
#
# for cmorize_target_var_files:
# input reading/checking hardcode - pressure input file is tagged with 'ps' potentially nearby
# output moving/writing hardcode - pressure out file is named with 'ps'
#



# ----- \start consts
DEBUG_MODE_RUN_ONE = True

#
INPUT_READ_PS_FILE_VAR_NAME = 'ps'
INPUT_READDIR_NAME_CHECKS = [ [ 'contains', '/work'],
['contains', '/net'],
['equal', '/local2'] ]

#
OUTPUT_TEMP_DIR_NAME = 'tmp'

#
INPUT_READ_OCEAN_GRID_VAR_NAME = 'xh'
INPUT_READ_TAG_OCEAN_GRID_VAR = 'xh'
INPUT_READ_Z_AXIS_NAME = 'Z'

#
Expand All @@ -85,36 +36,26 @@
INPUT_READ_TIME_BNDS = 'time_bnds'

#
INPUT_ACCEPT_VAR_DIMS = [3,4]
INPUT_ACCEPT_VERT_DIMS = ["plev30", "plev19", "plev8","height2m", "level", "lev", "levhalf"]

# ----
#INPUT_SUBCASE1_VAR_VERT_DIMS = { "4": [ "plev30", "plev19", "plev8", "height2m" ] }

# ----
INPUT_SUBCASE2_VAR_VERT_DIMS = { "4": [ "level", "lev", "levhalf" ] }
#-
OUTPUT_SUBCASE2_PS_VAR_NAME = 'ps'
OUTPUT_SUBCASE2_PS_VAR_UNIT = 'Pa'

# --- ---
INPUT_SUBCASE2_0_VAR_VERT_DIMS = { "4": [ "levhalf" ] }
INPUT_SUBCASE2_0_ZFACT_VALS = ['ap_bnds','b_bnds']
#-
OUTPUT_SUBCASE2_0_ZFACT_VAL_NAMES = ['ap_half','b_half']
OUTPUT_SUBCASE2_0_VERT_LVL_NAME = 'altername_hybrid_sigma_half'

# --- ---
INPUT_SUBCASE2_1_VAR_VERT_DIMS = { "4": [ "level", "lev" ] }
INPUT_SUBCASE2_1_ZFACT_VALS = ['ap','b']
INPUT_SUBCASE2_1_ZFACT_BNDS = ['ap_bnds','b_bnds']
#-
OUTPUT_SUBCASE2_0_ZFACT_VAL_NAMES = ['ap','b']
OUTPUT_SUBCASE2_0_VERT_LVL_NAME = 'altername_hybrid_sigma'
INPUT_READ_AP_ZFACTS = ['ap','ap_bnds']
INPUT_READ_B_ZFACTS = ['b','b_bnds']

#
OUTPUT_WRITE_AXIS_LEVELS_HALF_NAME="alternate_hybrid_sigma"
OUTPUT_WRITE_AP_ZFACTS_NAME = "ap"
OUTPUT_WRITE_B_ZFACTS_NAME = "b"

#
OUTPUT_WRITE_AXIS_LEVELS_HALF_NAME="alternate_hybrid_sigma_half"
OUTPUT_WRITE_AP_ZFACTS_HALF_NAME = "ap_half"
OUTPUT_WRITE_B_ZFACTS_HALF_NAME = "b_half"

#
INPUT_ACCEPT_VAR_DIMS = [3,4]
INPUT_ACCEPT_VERT_DIMS = ['plev30', 'plev19', 'plev8','height2m', 'level', 'lev', 'levhalf']

#
OUTPUT_PS_VAR_NAME = 'ps'
OUTPUT_PS_VAR_UNIT = 'Pa'
# ----- \end consts

### ------ helper functions ------ ###
Expand All @@ -135,7 +76,7 @@ def copy_nc(in_nc, out_nc):
# note- totally infuriating...
# the correct value for the format arg is netCDF4.Dataset.data_model
# and NOT netCDF4.Dataset.disk_format
dsout = nc.Dataset(out_nc, "w",
dsout = nc.Dataset(out_nc, 'w',
format = dsin.data_model)

#Copy dimensions
Expand Down Expand Up @@ -190,10 +131,10 @@ def get_iso_datetimes(var_filenames, iso_datetime_arr = None):
if iso_datetime_arr is None:
iso_datetime_arr = []
for filename in var_filenames:
iso_datetime = filename.split(".")[1]
iso_datetime = filename.split('.')[1]
if iso_datetime not in iso_datetime_arr:
iso_datetime_arr.append(
filename.split(".")[1] )
filename.split('.')[1] )
iso_datetime_arr.sort()
#print(f"(get_iso_datetimes) Available dates: {iso_datetime_arr}")
if len(iso_datetime_arr) < 1:
Expand All @@ -205,9 +146,10 @@ def check_dataset_for_ocean_grid(ds):
one argument. this function has no return.
ds: netCDF4.Dataset object containing variables with associated dimensional information.
'''
if "xh" in list(ds.variables.keys()):
if INPUT_READ_TAG_OCEAN_GRID_VAR in list(ds.variables.keys()):
raise NotImplementedError(
"(check_dataset_for_ocean_grid) 'xh' found in var_list. ocean grid req'd but not yet unimplemented. stop.")
"(check_dataset_for_ocean_grid) {INPUT_READ_TAG_OCEAN_GRID_VAR} "
"found in var_list. ocean grid req'd but not yet unimplemented. stop.")


def get_vertical_dimension(ds,target_var):
Expand All @@ -226,7 +168,7 @@ def get_vertical_dimension(ds,target_var):
dims = variable.dimensions
for dim in dims:
# if it is not a vertical axis, move on.
if not (ds[dim].axis and ds[dim].axis == "Z"):
if not (ds[dim].axis and ds[dim].axis == INPUT_READ_Z_AXIS_NAME):
continue
vert_dim = dim
return vert_dim
Expand All @@ -241,14 +183,14 @@ def create_tmp_dir(outdir):
'''
print(f"(create_tmp_dir) outdir = {outdir}")
tmp_dir = None
if any( [ outdir == "/local2",
outdir.find("/work") != -1,
outdir.find("/net" ) != -1 ] ):
if any( [ outdir == '/local2',
outdir.find('/work') != -1,
outdir.find('/net' ) != -1 ] ):
print(f'(create_tmp_dir) using /local /work /net ( tmp_dir = {outdir}/ )')
tmp_dir = str( Path("{outdir}/").resolve() )
tmp_dir = str( Path(f"{outdir}/").resolve() ) + '/'
else:
print(f'(create_tmp_dir) NOT using /local /work /net (tmp_dir = {outdir}/tmp/ )')
tmp_dir = str( Path(f"{outdir}/tmp/").resolve() )
print(f'(create_tmp_dir) NOT using /local /work /net (tmp_dir = {outdir}/{OUTPUT_TEMP_DIR_NAME}/ )')
tmp_dir = str( Path(f"{outdir}/{OUTPUT_TMP_DIR_NAME}/").resolve() ) + '/'
try:
os.makedirs(tmp_dir, exist_ok=True)
except Exception as exc:
Expand All @@ -264,12 +206,22 @@ def rewrite_netcdf_file_var ( proj_table_vars = None,
target_var = None,
json_exp_config = None,
json_table_config = None):#, tmp_dir = None ):
''' rewrite the input netcdf file nc_fl containing target_var in a CMIP-compliant manner.
'''
rewrite the input netcdf file nc_fl containing target_var in a CMIP-compliant manner.
accepts six arguments, all required:
proj_table_vars: json dictionary object, variable table read from json_table_config.
local_var: string, variable name used for finding files locally containing target_var,
this argument is often equal to target_var.
netcdf_file: string, representing path to intput netcdf file.
target_var: string, representing the variable name attached to the data object in the netcdf file.
json_exp_config: string, representing path to json configuration file holding metadata for appending to output
this argument is most used for making sure the right grid label is getting attached to the right output
json_table_config: string, representing path to json configuration file holding variable names for a given table.
proj_table_vars is read from this file, but both are passed anyways.
'''
print('\n\n-------------------------- START rewrite_netcdf_file_var call -----')
print( "(rewrite_netcdf_file_var) input data: " )
print(f" local_var = {local_var}" )
print(f" target_var = {target_var}")
print( "(rewrite_netcdf_file_var) input data: \n"
f" local_var = {local_var}\n"
f" target_var = {target_var}")


# open the input file
Expand All @@ -285,23 +237,20 @@ def rewrite_netcdf_file_var ( proj_table_vars = None,
# figure out the dimension names programmatically TODO
# Define lat and lon dimensions
# Assume input file is lat/lon grid
lat = ds["lat"][:]
lon = ds["lon"][:]
lat_bnds = ds["lat_bnds"][:]
lon_bnds = ds["lon_bnds"][:]

## Define time
#time = ds["time"][:]
lat = ds[INPUT_READ_LAT_DIM ][:]
lon = ds[INPUT_READ_LON_DIM ][:]
lat_bnds = ds[INPUT_READ_LAT_BNDS][:]
lon_bnds = ds[INPUT_READ_LON_BNDS][:]

# read in time_coords + units
time_coords = ds["time"][:]
time_coord_units = ds["time"].units
time_coords = ds[INPUT_READ_TIME_DIM][:]
time_coord_units = ds[INPUT_READ_TIME_DIM].units
print(f"(rewrite_netcdf_file_var) time_coord_units = {time_coord_units}")

# read in time_bnds , if present
time_bnds = []
try:
time_bnds = ds["time_bnds"][:]
time_bnds = ds[INPUT_READ_TIME_BNDS][:]
#print(f"(rewrite_netcdf_file_var) time_bnds = {time_bnds}")
except ValueError:
print( "(rewrite_netcdf_file_var) WARNING grabbing time_bnds didnt work... moving on")
Expand All @@ -319,15 +268,14 @@ def rewrite_netcdf_file_var ( proj_table_vars = None,
print(f"(rewrite_netcdf_file_var) var_dim = {var_dim}, local_var = {local_var}")

# Check var_dim
if var_dim not in [3, 4]:
raise ValueError(f"var_dim == {var_dim} != 3 nor 4. stop.")
if var_dim not in INPUT_ACCEPT_VAR_DIMS:
raise ValueError(f"var_dim is not in {INPUT_ACCEPT_VAR_DIMS}...\n stop.\n")

# Check var_dim and vert_dim and assign lev if relevant.
# error if vert_dim wrong given var_dim
lev = None
if var_dim == 4:
if vert_dim not in [ "plev30", "plev19", "plev8",
"height2m", "level", "lev", "levhalf"] :
if vert_dim not in INPUT_ACCEPT_VERT_DIMS:
raise ValueError(f'var_dim={var_dim}, vert_dim = {vert_dim} is not supported')
lev = ds[vert_dim]

Expand Down Expand Up @@ -386,50 +334,50 @@ def rewrite_netcdf_file_var ( proj_table_vars = None,

elif vert_dim in ["level", "lev", "levhalf"]:
# find the ps file nearby
ps_file = netcdf_file.replace(f'.{target_var}.nc', '.ps.nc')
ps_file = netcdf_file.replace(f'.{target_var}.nc', f'.{INPUT_READ_PS_FILE_VAR_NAME}.nc')
ds_ps = nc.Dataset(ps_file)
ps = ds_ps['ps'][:].copy()
ps = ds_ps[INPUT_READ_PS_FILE_VAR_NAME][:].copy()
ds_ps.close()

# assign lev_half specifics
if vert_dim == "lev_half":
ierr_ap = cmor.zfactor( zaxis_id = cmor_lev,
zfactor_name = "ap_half",
zfactor_name = OUTPUT_WRITE_AP_ZFACTS_HALF_NAME,
axis_ids = [cmor_lev, ],
zfactor_values = ds["ap_bnds"][:],
units = ds["ap_bnds"].units )
zfactor_values = ds[ INPUT_READ_AP_ZFACTS[1] ][:],
units = ds[ INPUT_READ_AP_ZFACTS[1] ].units )
ierr_b = cmor.zfactor( zaxis_id = cmor_lev,
zfactor_name = "b_half",
zfactor_name = OUTPUT_WRITE_B_ZFACTS_HALF_NAME,
axis_ids = [cmor_lev, ],
zfactor_values = ds["b_bnds"][:],
units = ds["b_bnds"].units )
cmor_lev = cmor.axis( "alternate_hybrid_sigma_half",
zfactor_values = ds[ INPUT_READ_B_ZFACTS[1] ][:],
units = ds[ INPUT_READ_B_ZFACTS[1] ].units )
cmor_lev = cmor.axis( OUTPUT_WRITE_AXIS_LEVELS_HALF_NAME,
coord_vals = lev[:],
units = lev.units )
else:
ierr_ap = cmor.zfactor( zaxis_id = cmor_lev,
zfactor_name = "ap",
zfactor_name = OUTPUT_WRITE_AP_ZFACTS_NAME,
axis_ids = [cmor_lev, ],
zfactor_values = ds["ap"][:],
zfactor_bounds = ds["ap_bnds"][:],
units = ds["ap"].units )
zfactor_values = ds[ INPUT_READ_AP_ZFACTS[0] ][:],
zfactor_bounds = ds[ INPUT_READ_AP_ZFACTS[1] ][:],
units = ds[ INPUT_READ_AP_ZFACTS[0] ].units )
ierr_b = cmor.zfactor( zaxis_id = cmor_lev,
zfactor_name = "b",
zfactor_name = OUTPUT_WRITE_ZFACTS_NAME,
axis_ids = [cmor_lev, ],
zfactor_values = ds["b"][:],
zfactor_bounds = ds["b_bnds"][:],
units = ds["b"].units )
cmor_lev = cmor.axis( "alternate_hybrid_sigma",
zfactor_values = ds[ INPUT_READ_b_ZFACTS[0] ][:],
zfactor_bounds = ds[ INPUT_READ_b_ZFACTS[1] ][:],
units = ds[ INPUT_READ_b_ZFACTS[0] ].units )
cmor_lev = cmor.axis( OUTPUT_WRITE_AXIS_LEVELS_NAME,
coord_vals = lev[:],
units = lev.units,
cell_bounds = ds[vert_dim+"_bnds"] )

print(f'(rewrite_netcdf_file_var) ierr_ap after calling cmor_zfactor: {ierr_ap}\n'
f'(rewrite_netcdf_file_var) ierr_b after calling cmor_zfactor: {ierr_b}' )
ips = cmor.zfactor( zaxis_id = cmor_lev,
zfactor_name = "ps",
zfactor_name = OUTPUT_PS_VAR_NAME,
axis_ids = [cmor_time, cmor_lat, cmor_lon],
units = "Pa" )
units = OUTPUT_PS_VAR_UNIT )
save_ps = True
# assign axes at end of 4-dim case
axes = [cmor_time, cmor_lev, cmor_lat, cmor_lon]
Expand Down Expand Up @@ -516,8 +464,8 @@ def cmorize_target_var_files( indir = None, target_var = None, local_var = None,
copy_nc( nc_fls[i], nc_file_work)

# if the ps file exists, we'll copy it to the work directory too
nc_ps_file = nc_fls[i].replace(f'.{local_var}.nc', '.ps.nc')
nc_ps_file_work = nc_file_work.replace(f'.{local_var}.nc', '.ps.nc')
nc_ps_file = nc_fls[i].replace(f'.{local_var}.nc', f'.{INPUT_READ_PS_FILE_VAR_NAME}.nc')
nc_ps_file_work = nc_file_work.replace(f'.{local_var}.nc', f'.{INPUT_READ_PS_FILE_VAR_NAME}.nc')
if Path(nc_ps_file).exists():
print(f"(cmorize_target_var_files) nc_ps_file_work = {nc_ps_file_work}")
copy_nc(nc_ps_file, nc_ps_file_work)
Expand Down

0 comments on commit 4b31338

Please sign in to comment.