diff --git a/.github/workflows/build_conda.yml b/.github/workflows/build_conda.yml index d9ba9162..e11fb93d 100644 --- a/.github/workflows/build_conda.yml +++ b/.github/workflows/build_conda.yml @@ -11,6 +11,8 @@ jobs: steps: - name: Checkout Files uses: actions/checkout@v4 + with: + submodules: 'recursive' - name: Run Conda to Build run: | conda config --append channels conda-forge diff --git a/.github/workflows/create_test_conda_env.yml b/.github/workflows/create_test_conda_env.yml index ce5de814..24ecfdaf 100644 --- a/.github/workflows/create_test_conda_env.yml +++ b/.github/workflows/create_test_conda_env.yml @@ -7,16 +7,16 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 + with: + submodules: 'recursive' - name: Set up Python uses: actions/setup-python@v5 with: python-version: '>=3.9' - - name: Add conda to system path run: | # $CONDA is an env var pointing to root of miniconda dir echo $CONDA/bin >> $GITHUB_PATH - - name: Create fre-cli environment run: | # create environment containing all dependencies diff --git a/.github/workflows/publish_conda.yml b/.github/workflows/publish_conda.yml index e36a72ea..b7a20fd8 100644 --- a/.github/workflows/publish_conda.yml +++ b/.github/workflows/publish_conda.yml @@ -11,6 +11,8 @@ jobs: steps: - name: Checkout Files uses: actions/checkout@v4 + with: + submodules: 'recursive' - name: Run Conda to Build and Publish run: | conda config --append channels conda-forge diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 00000000..e5414cde --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "fre/gfdl_msd_schemas"] + path = fre/gfdl_msd_schemas + url = https://github.com/NOAA-GFDL/gfdl_msd_schemas diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index a7bd7b6c..c37953c9 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,7 +1,7 @@ ## **For Developers** * Developers are free to use this repository's `README.md` to familiarize with the CLI and save time from having to install any dependencies, but development within a Conda environment is heavily recommended regardless -* Gain access to the repository with `git clone git@github.com:NOAA-GFDL/fre-cli.git` or your fork's link (recommended) and an SSH RSA key +* Gain access to the repository with `git clone --recursive git@github.com:NOAA-GFDL/fre-cli.git` or your fork's link (recommended) and an SSH RSA key - Once inside the repository, developers can test local changes by running a `pip install .` inside of the root directory to install the fre-cli package locally with the newest local changes on top of the installed Conda fre-cli dependencies - Test as a normal user would use the CLI * Create a GitHub issue to reflect your contribution's background and reference it with Git commits diff --git a/docs/usage.rst b/docs/usage.rst index 95afbe5a..b0fa001c 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -87,7 +87,7 @@ Usage (Developers) Developers are free to use the user guide above to familiarize with the CLI and save time from having to install any dependencies, but development within a Conda environment is heavily recommended regardless -Gain access to the repository with *git clone git@github.com:NOAA-GFDL/fre-cli.git* or your fork's link (recommended) and an SSH RSA key +Gain access to the repository with *git clone --recursive git@github.com:NOAA-GFDL/fre-cli.git* or your fork's link (recommended) and an SSH RSA key Once inside the repository, developers can test local changes by running a *pip install .* inside of the root directory to install the fre-cli package locally with the newest local changes diff --git a/fre/cmor/README.md b/fre/cmor/README.md index 8f5179c6..865c1ee7 100644 --- a/fre/cmor/README.md +++ b/fre/cmor/README.md @@ -29,18 +29,39 @@ this subtool's help, and command-specific `run` help: # subtool command-specific help, e.g. for run -> fre cmor run --help - Usage: fre cmor run [OPTIONS] - - Rewrite climate model output - - Options: - -d, --indir TEXT Input directory [required] - -l, --varlist TEXT Variable list [required] - -r, --table_config TEXT Table configuration [required] - -p, --exp_config TEXT Experiment configuration [required] - -o, --outdir TEXT Output directory [required] - --help Show this message and exit. +> fre cmor run --help +Usage: fre cmor run [OPTIONS] + + Rewrite climate model output files with CMIP-compliant metadata for down- + stream publishing + +Options: + -d, --indir TEXT directory containing netCDF files. keys specified + in json_var_list are local variable names used for + targeting specific files in this directory + [required] + -l, --varlist TEXT path pointing to a json file containing directory + of key/value pairs. the keys are the 'local' names + used in the filename, and the values pointed to by + those keys are strings representing the name of the + variable contained in targeted files. the key and + value are often the same, but it is not required. + [required] + -r, --table_config TEXT json file containing CMIP-compliant per- + variable/metadata for specific MIP table. The MIP + table can generally be identified by the specific + filename (e.g. 'Omon') [required] + -p, --exp_config TEXT json file containing metadata dictionary for + CMORization. this metadata is effectively appended + to the final output file's header [required] + -o, --outdir TEXT directory root that will contain the full output + and output directory structure generated by the + cmor module upon request. [required] + -v, --opt_var_name TEXT optional, specify a variable name to specifically + process only filenames matching that variable name. + I.e., this string help target local_vars, not + target_vars. + --help Show this message and exit. ``` diff --git a/fre/cmor/cmor_mixer.py b/fre/cmor/cmor_mixer.py index dd0556f2..0f4ef243 100755 --- a/fre/cmor/cmor_mixer.py +++ b/fre/cmor/cmor_mixer.py @@ -5,6 +5,7 @@ ''' import os +import glob import json import subprocess from pathlib import Path @@ -13,8 +14,8 @@ import click import cmor - # ----- \start consts +DEBUG_MODE_RUN_ONE = True # ----- \end consts @@ -26,8 +27,8 @@ def copy_nc(in_nc, out_nc): in_nc: string, path to an input netcdf file we wish to copy out_nc: string, an output path to copy the targeted input netcdf file to ''' - print(f'(copy_nc) in_nc: {in_nc}') - print(f'(copy_nc) out_nc: {out_nc}') + print(f'(copy_nc) in_nc: {in_nc}\n' + f' out_nc: {out_nc}') # input file dsin = nc.Dataset(in_nc) @@ -56,22 +57,24 @@ def copy_nc(in_nc, out_nc): dsout.close() -def get_var_filenames(indir, var_filenames = None): +def get_var_filenames(indir, var_filenames = None, local_var = None): ''' - appends files ending in .nc located within indir to list var_filenames accepts two arguments + appends files ending in .nc located within indir to list var_filenames accepts three arguments indir: string, representing a path to a directory containing files ending in .nc extension var_filenames: list of strings, empty or non-empty, to append discovered filenames to. the object pointed to by the reference var_filenames is manipulated, and so need not be returned. + local_var: string, optional, if not None, will be used for ruling out filename targets ''' if var_filenames is None: var_filenames = [] - var_filenames_all = os.listdir(indir) + filename_pattern='.nc' if local_var is None else f'.{local_var}.nc' + print(f'(get_var_filenames) filename_pattern={filename_pattern}') + var_filenames_all=glob.glob(f'{indir}/*{filename_pattern}') print(f'(get_var_filenames) var_filenames_all={var_filenames_all}') for var_file in var_filenames_all: - if var_file.endswith('.nc'): - var_filenames.append(var_file) - #print(f"(get_var_filenames) var_filenames = {var_filenames}") + var_filenames.append( Path(var_file).name ) + print(f"(get_var_filenames) var_filenames = {var_filenames}") if len(var_filenames) < 1: raise ValueError(f'target directory had no files with .nc ending. indir =\n {indir}') var_filenames.sort() @@ -96,7 +99,7 @@ def get_iso_datetimes(var_filenames, iso_datetime_arr = None): iso_datetime_arr.sort() #print(f"(get_iso_datetimes) Available dates: {iso_datetime_arr}") if len(iso_datetime_arr) < 1: - raise ValueError('ERROR: iso_datetime_arr has length 0!') + raise ValueError('(get_iso_datetimes) ERROR: iso_datetime_arr has length 0!') def check_dataset_for_ocean_grid(ds): ''' @@ -104,12 +107,10 @@ def check_dataset_for_ocean_grid(ds): one argument. this function has no return. ds: netCDF4.Dataset object containing variables with associated dimensional information. ''' - #print(f'(check_dataset_for_ocean_grid) {ds}') - #print(f'(check_dataset_for_ocean_grid) {ds.variables}') - #print(f'(check_dataset_for_ocean_grid) {ds.variables.keys()}') if "xh" in list(ds.variables.keys()): raise NotImplementedError( - "'xh' found in var_list. ocean grid req'd but not yet unimplemented. stop.") + "(check_dataset_for_ocean_grid) 'xh' found in var_list. ocean grid req'd but not yet unimplemented. stop.") + def get_vertical_dimension(ds,target_var): ''' @@ -127,12 +128,14 @@ def get_vertical_dimension(ds,target_var): dims = variable.dimensions for dim in dims: # if it is not a vertical axis, move on. + print(f'(get_vertical_dimension) dim={dim}') + if dim == 'landuse': + continue if not (ds[dim].axis and ds[dim].axis == "Z"): continue vert_dim = dim return vert_dim - def create_tmp_dir(outdir): ''' creates a tmp_dir based on targeted output directory root. returns the name of the tmp dir. @@ -141,20 +144,20 @@ def create_tmp_dir(outdir): file output. tmp_dir will be slightly different depending on the output directory targeted ''' - print(f"(cmorize_target_var_files) outdir = {outdir}") + print(f"(create_tmp_dir) outdir = {outdir}") tmp_dir = None if any( [ outdir == "/local2", outdir.find("/work") != -1, outdir.find("/net" ) != -1 ] ): - print(f'(cmorize_target_var_files) using /local /work /net ( tmp_dir = {outdir}/ )') - tmp_dir = "{outdir}/" + print(f'(create_tmp_dir) using /local /work /net ( tmp_dir = {outdir}/ )') + tmp_dir = str( Path("{outdir}/").resolve() ) else: - print('(cmorize_target_var_files) NOT using /local /work /net (tmp_dir = outdir/tmp/ )') - tmp_dir = f"{outdir}/tmp/" + print(f'(create_tmp_dir) NOT using /local /work /net (tmp_dir = {outdir}/tmp/ )') + tmp_dir = str( Path(f"{outdir}/tmp/").resolve() ) try: os.makedirs(tmp_dir, exist_ok=True) except Exception as exc: - raise OSError('problem creating temp output directory. stop.') from exc + raise OSError('(create_tmp_dir) problem creating temp output directory. stop.') from exc return tmp_dir @@ -166,12 +169,23 @@ def rewrite_netcdf_file_var ( proj_table_vars = None, target_var = None, json_exp_config = None, json_table_config = None):#, tmp_dir = None ): - ''' rewrite the input netcdf file nc_fl containing target_var in a CMIP-compliant manner. + ''' + rewrite the input netcdf file nc_fl containing target_var in a CMIP-compliant manner. + accepts six arguments, all required: + proj_table_vars: json dictionary object, variable table read from json_table_config. + local_var: string, variable name used for finding files locally containing target_var, + this argument is often equal to target_var. + netcdf_file: string, representing path to intput netcdf file. + target_var: string, representing the variable name attached to the data object in the netcdf file. + json_exp_config: string, representing path to json configuration file holding metadata for appending to output + this argument is most used for making sure the right grid label is getting attached to the right output + json_table_config: string, representing path to json configuration file holding variable names for a given table. + proj_table_vars is read from this file, but both are passed anyways. ''' print('\n\n-------------------------- START rewrite_netcdf_file_var call -----') print( "(rewrite_netcdf_file_var) input data: " ) - print(f"(rewrite_netcdf_file_var) local_var = {local_var}" ) - print(f"(rewrite_netcdf_file_var) target_var = {target_var}") + print(f" local_var = {local_var}" ) + print(f" target_var = {target_var}") # open the input file @@ -209,24 +223,23 @@ def rewrite_netcdf_file_var ( proj_table_vars = None, print( "(rewrite_netcdf_file_var) WARNING grabbing time_bnds didnt work... moving on") - - - - # read the input... units? + # read the input variable data, i believe var = ds[target_var][:] - # determine the vertical dimension by looping over netcdf variables - vert_dim = get_vertical_dimension(ds,target_var) #0#vert_dim = None + vert_dim = get_vertical_dimension(ds, target_var) print(f"(rewrite_netcdf_file_var) Vertical dimension of {target_var}: {vert_dim}") - - # Check var_dim, vert_dim + # grab var_dim var_dim = len(var.shape) + print(f"(rewrite_netcdf_file_var) var_dim = {var_dim}, local_var = {local_var}") + + # Check var_dim if var_dim not in [3, 4]: raise ValueError(f"var_dim == {var_dim} != 3 nor 4. stop.") - # check for vert_dim error condition. if pass, assign lev for later use. + # Check var_dim and vert_dim and assign lev if relevant. + # error if vert_dim wrong given var_dim lev = None if var_dim == 4: if vert_dim not in [ "plev30", "plev19", "plev8", @@ -234,9 +247,6 @@ def rewrite_netcdf_file_var ( proj_table_vars = None, raise ValueError(f'var_dim={var_dim}, vert_dim = {vert_dim} is not supported') lev = ds[vert_dim] - print(f"(rewrite_netcdf_file_var) var_dim = {var_dim}, local_var = {local_var}") - - # now we set up the cmor module object # initialize CMOR @@ -249,14 +259,13 @@ def rewrite_netcdf_file_var ( proj_table_vars = None, ) # read experiment configuration file + print(f"(rewrite_netcdf_file_var) cmor is opening: json_exp_config = {json_exp_config}") cmor.dataset_json(json_exp_config) - print(f"(rewrite_netcdf_file_var) json_exp_config = {json_exp_config}") - print(f"(rewrite_netcdf_file_var) json_table_config = {json_table_config}") - # load variable list (CMOR table) + # load CMOR table + print(f"(rewrite_netcdf_file_var) cmor is opening json_table_config = {json_table_config}") cmor.load_table(json_table_config) - #units = proj_table_vars["variable_entry"] [local_var] ["units"] units = proj_table_vars["variable_entry"] [target_var] ["units"] print(f"(rewrite_netcdf_file_var) units={units}") @@ -264,15 +273,14 @@ def rewrite_netcdf_file_var ( proj_table_vars = None, cmor_lon = cmor.axis("longitude", coord_vals = lon, cell_bounds = lon_bnds, units = "degrees_E") try: print( f"(rewrite_netcdf_file_var) Executing cmor.axis('time', \n" - f"(rewrite_netcdf_file_var) coord_vals = \n{time_coords}, \n" - f"(rewrite_netcdf_file_var) cell_bounds = time_bnds, units = {time_coord_units}) ") + f" coord_vals = \n{time_coords}, \n" + f" cell_bounds = time_bnds, units = {time_coord_units}) ") cmor_time = cmor.axis("time", coord_vals = time_coords, cell_bounds = time_bnds, units = time_coord_units) - #cmor_time = cmor.axis("time", coord_vals = time_coords, units = time_coord_units) except ValueError as exc: - print(f"(rewrite_netcdf_file_var) WARNING exception raised... exc={exc}") - print( "(rewrite_netcdf_file_var) cmor_time = cmor.axis('time', " - "coord_vals = time_coords, units = time_coord_units)") + print(f"(rewrite_netcdf_file_var) WARNING exception raised... exc={exc}\n" + " cmor_time = cmor.axis('time', \n" + " coord_vals = time_coords, units = time_coord_units)") cmor_time = cmor.axis("time", coord_vals = time_coords, units = time_coord_units) # initializations @@ -300,7 +308,10 @@ def rewrite_netcdf_file_var ( proj_table_vars = None, ds_ps.close() # assign lev_half specifics - if vert_dim == "lev_half": + if vert_dim == "levhalf": + cmor_lev = cmor.axis( "alternate_hybrid_sigma_half", + coord_vals = lev[:], + units = lev.units ) ierr_ap = cmor.zfactor( zaxis_id = cmor_lev, zfactor_name = "ap_half", axis_ids = [cmor_lev, ], @@ -311,10 +322,11 @@ def rewrite_netcdf_file_var ( proj_table_vars = None, axis_ids = [cmor_lev, ], zfactor_values = ds["b_bnds"][:], units = ds["b_bnds"].units ) - cmor_lev = cmor.axis( "alternate_hybrid_sigma_half", - coord_vals = lev[:], - units = lev.units ) else: + cmor_lev = cmor.axis( "alternate_hybrid_sigma", + coord_vals = lev[:], + units = lev.units, + cell_bounds = ds[vert_dim+"_bnds"] ) ierr_ap = cmor.zfactor( zaxis_id = cmor_lev, zfactor_name = "ap", axis_ids = [cmor_lev, ], @@ -327,13 +339,9 @@ def rewrite_netcdf_file_var ( proj_table_vars = None, zfactor_values = ds["b"][:], zfactor_bounds = ds["b_bnds"][:], units = ds["b"].units ) - cmor_lev = cmor.axis( "alternate_hybrid_sigma", - coord_vals = lev[:], - units = lev.units, - cell_bounds = ds[vert_dim+"_bnds"] ) - print(f'(rewrite_netcdf_file_var) ierr_ap after calling cmor_zfactor: {ierr_ap}') - print(f'(rewrite_netcdf_file_var) ierr_b after calling cmor_zfactor: {ierr_b}') + print(f'(rewrite_netcdf_file_var) ierr_ap after calling cmor_zfactor: {ierr_ap}\n' + f'(rewrite_netcdf_file_var) ierr_b after calling cmor_zfactor: {ierr_b}' ) ips = cmor.zfactor( zaxis_id = cmor_lev, zfactor_name = "ps", axis_ids = [cmor_time, cmor_lat, cmor_lon], @@ -355,9 +363,9 @@ def rewrite_netcdf_file_var ( proj_table_vars = None, cmor.write(cmor_var, var) if save_ps: if any( [ ips is None, ps is None ] ): - print( 'WARNING: ps or ips is None!, but save_ps is True!') - print(f'ps = {ps}, ips = {ips}') - print( 'skipping ps writing!') + print( '(rewrite_netcdf_file_var) WARNING: ps or ips is None!, but save_ps is True!\n' + f' ps = {ps}, ips = {ips}\n' + ' skipping ps writing!' ) else: cmor.write(ips, ps, store_with = cmor_var) cmor.close(ips, file_name = True, preserve = False) @@ -394,25 +402,22 @@ def cmorize_target_var_files( indir = None, target_var = None, local_var = None, ''' print('\n\n-------------------------- START cmorize_target_var_files call -----') - print(f"(cmorize_target_var_files) local_var = {local_var} to be used for file-targeting.") - print(f"(cmorize_target_var_files) target_var = {target_var} to be used for reading the data " - "from the file") - print(f"(cmorize_target_var_files) outdir = {outdir}") + print(f"(cmorize_target_var_files) local_var = {local_var} to be used for file-targeting.\n" + f" target_var = {target_var} to be used for reading the data \n" + " from the file\n" + f" outdir = {outdir}") #determine a tmp dir for working on files. - tmp_dir = create_tmp_dir( outdir ) + tmp_dir = create_tmp_dir( outdir ) + '/' print(f'(cmorize_target_var_files) will use tmp_dir={tmp_dir}') - print("\n\n==== begin (???) mysterious file movement ====================================") # loop over sets of dates, each one pointing to a file nc_fls = {} for i, iso_datetime in enumerate(iso_datetime_arr): - # why is nc_fls a filled list/array/object thingy here? see above line - #nc_fls[i] = f"{indir}/{name_of_set}.{iso_datetime}.{target_var}.nc" nc_fls[i] = f"{indir}/{name_of_set}.{iso_datetime}.{local_var}.nc" print(f"(cmorize_target_var_files) input file = {nc_fls[i]}") if not Path(nc_fls[i]).exists(): @@ -421,8 +426,8 @@ def cmorize_target_var_files( indir = None, target_var = None, local_var = None, # create a copy of the input file with local var name into the work directory - #nc_file_work = f"{tmp_dir}{name_of_set}.{iso_datetime}.{target_var}.nc" nc_file_work = f"{tmp_dir}{name_of_set}.{iso_datetime}.{local_var}.nc" + print(f"(cmorize_target_var_files) nc_file_work = {nc_file_work}") copy_nc( nc_fls[i], nc_file_work) @@ -434,20 +439,35 @@ def cmorize_target_var_files( indir = None, target_var = None, local_var = None, copy_nc(nc_ps_file, nc_ps_file_work) + # TODO think of better way to write this kind of conditional data movement... # now we have a file in our targets, point CMOR to the configs and the input file(s) + make_cmor_write_here = None + print( Path( tmp_dir ) ) + print( Path( os.getcwd() ) ) + if Path( tmp_dir ).is_absolute(): + print(f'tmp_dir is absolute') + make_cmor_write_here = tmp_dir + elif Path( tmp_dir ).exists(): # relative to where we are + print(f'tmp_dir is relative to CWD!') + make_cmor_write_here = os.getcwd() + '/'+tmp_dir # unavoidable, cmor module FORCES write to CWD + assert make_cmor_write_here is not None + + gotta_go_back_here=os.getcwd() + try: + print(f"cd'ing to \n {make_cmor_write_here}" ) + os.chdir( make_cmor_write_here ) + except: + raise OSError(f'could not chdir to {make_cmor_write_here}') + print ("(cmorize_target_var_files) calling rewrite_netcdf_file_var") - gotta_go_back_here=os.getcwd()+'/' - os.chdir(gotta_go_back_here+tmp_dir) # this is unavoidable, cmor module FORCES write to CWD - local_file_name = rewrite_netcdf_file_var( proj_table_vars , - local_var , - gotta_go_back_here + nc_file_work , - target_var , - gotta_go_back_here + json_exp_config , - gotta_go_back_here + json_table_config)#, -# gotta_go_back_here + tmp_dir ) - os.chdir(gotta_go_back_here) - assert Path( gotta_go_back_here+tmp_dir+local_file_name ).exists() - #assert False + local_file_name = rewrite_netcdf_file_var( proj_table_vars , + local_var , + nc_file_work , + target_var , + json_exp_config , + json_table_config ) + os.chdir( gotta_go_back_here ) + # now that CMOR has rewritten things... we can take our post-rewriting actions # the final output filename will be... @@ -465,11 +485,11 @@ def cmorize_target_var_files( indir = None, target_var = None, local_var = None, print(f'(cmorize_target_var_files) WARNING: directory {filedir} already exists!') # hmm.... this is making issues for pytest - mv_cmd = f"mv {tmp_dir}{local_file_name} {filedir}" + mv_cmd = f"mv {tmp_dir}/{local_file_name} {filedir}" print(f"(cmorize_target_var_files) moving files...\n {mv_cmd}") - subprocess.run(mv_cmd, shell=True, check=True) + subprocess.run(mv_cmd, shell = True, check = True) - # ------ refactor this into function? TODO + # ------ refactor this into function? #TODO # ------ what is the use case for this logic really?? filename_no_nc = filename[:filename.rfind(".nc")] chunk_str = filename_no_nc[-6:] @@ -479,7 +499,7 @@ def cmorize_target_var_files( indir = None, target_var = None, local_var = None, filename_corr = "{filename[:filename.rfind('.nc')]}_{iso_datetime}.nc" mv_cmd = f"mv {filename} {filename_corr}" print(f"(cmorize_target_var_files) moving files, strange chunkstr logic...\n {mv_cmd}") - subprocess.run(mv_cmd, shell=True, check=True) + subprocess.run(mv_cmd, shell = True, check = True) # ------ end refactor this into function? # delete files in work dirs @@ -489,6 +509,10 @@ def cmorize_target_var_files( indir = None, target_var = None, local_var = None, if Path(nc_ps_file_work).exists(): Path(nc_ps_file_work).unlink() + if DEBUG_MODE_RUN_ONE: + print(f'WARNING: DEBUG_MODE_RUN_ONE is True!!!!') + print(f'WARNING: done processing one file!!!') + break @@ -497,9 +521,10 @@ def cmor_run_subtool( indir = None, json_var_list = None, json_table_config = None, json_exp_config = None , - outdir = None): + outdir = None, opt_var_name = None + ): ''' - primary steering function for the cmor_mixer tool, i.e essentially main. Accepts five args: + primary steering function for the cmor_mixer tool, i.e essentially main. Accepts six args: indir: string, directory containing netCDF files. keys specified in json_var_list are local variable names used for targeting specific files json_var_list: string, path pointing to a json file containing directory of key/value @@ -510,18 +535,23 @@ def cmor_run_subtool( indir = None, json_table_config: json file containing CMIP-compliant per-variable/metadata for specific MIP table. The MIP table can generally be identified by the specific filename (e.g. "Omon") - json_exp_config: json file containing other configuration details (FILL IN TO DO #TODO) + json_exp_config: json file containing metadata dictionary for CMORization. this metadata is effectively + appended to the final output file's header outdir: string, directory root that will contain the full output and output directory structure generated by the cmor module upon request. + opt_var_name: string, optional, specify a variable name to specifically process only filenames matching + that variable name. I.e., this string help target local_vars, not target_vars. ''' + # check req'd inputs if None in [indir, json_var_list, json_table_config, json_exp_config, outdir]: - raise ValueError(f'all input arguments are required!\n' - '[indir, json_var_list, json_table_config, json_exp_config, outdir] = \n' - f'[{indir}, {json_var_list}, {json_table_config}, ' - '{json_exp_config}, {outdir}]' ) + raise ValueError(f'(cmor_run_subtool) all input arguments except opt_var_name are required!\n' + ' [indir, json_var_list, json_table_config, json_exp_config, outdir] = \n' + f' [{indir}, {json_var_list}, {json_table_config}, ' + ' {json_exp_config}, {outdir}]' ) # open CMOR table config file - print('(cmor_run_subtool) getting table variables from json_table_config') + print( '(cmor_run_subtool) getting table variables from json_table_config = \n' + f' {json_table_config}' ) try: with open( json_table_config, "r", encoding = "utf-8") as table_config_file: proj_table_vars=json.load(table_config_file) @@ -531,6 +561,9 @@ def cmor_run_subtool( indir = None, f'ERROR: json_table_config file cannot be opened.\n' f' json_table_config = {json_table_config}' ) from exc + # now resolve the json_table_config path after confirming it can be open + json_table_config= str( Path(json_table_config).resolve() ) + # open input variable list print('(cmor_run_subtool) opening variable list json_var_list') try: @@ -542,41 +575,58 @@ def cmor_run_subtool( indir = None, f'ERROR: json_var_list file cannot be opened.\n' f' json_var_list = {json_var_list}' ) from exc - # examine input directory to obtain a list of input file targets - var_filenames = [] - get_var_filenames(indir, var_filenames) - print(f"(cmor_run_subtool) found filenames = \n {var_filenames}") - - # examine input files to obtain target date ranges - iso_datetime_arr = [] - get_iso_datetimes(var_filenames, iso_datetime_arr) - print(f"(cmor_run_subtool) found iso datetimes = \n {iso_datetime_arr}") - - # name_of_set == component label... - # which is not relevant for CMOR/CMIP... or is it? - name_of_set = var_filenames[0].split(".")[0] - print(f"(cmor_run_subtool) setting name_of_set = {name_of_set}") + # make sure the exp config exists too while we're at it... + if Path(json_exp_config).exists(): # if so, resolve to absolute path + json_exp_config = str( Path( json_exp_config).resolve() ) + else: + raise FileNotFoundError( + f'ERROR: json_exp_config file cannot be opened.\n' + f' json_exp_config = {json_exp_config}' ) # loop over entries in the json_var_list, read into var_list for local_var in var_list: # if its not in the table configurations variable_entry list, skip if var_list[local_var] not in proj_table_vars["variable_entry"]: - print(f"(cmor_run_subtool) WARNING: skipping local_var={local_var} /" - f" target_var={target_var}") - print( "(cmor_run_subtool) ... target_var not found in CMOR variable group") + print(f"(cmor_run_subtool) WARNING: skipping local_var = {local_var} /\n" + f" target_var = {var_list[local_var]}\n" + " ... target_var not found in CMOR variable group") + continue + + if all( [ opt_var_name is not None, + local_var != opt_var_name ] ): + print(f'(cmor_run_subtool) WARNING: skipping local_var={local_var} as it is not equal\n' + ' to the opt_var_name argument.') continue # it is in there, get the name of the data inside the netcdf file. target_var=var_list[local_var] # often equiv to local_var but not necessarily. if local_var != target_var: - print(f'(cmor_run_subtool) WARNING: local_var == {local_var} ' - f'!= {target_var} == target_var') - print(f'i am expecting {local_var} to be in the filename, and i expect the variable' - f' in that file to be {target_var}') + print(f'(cmor_run_subtool) WARNING: local_var == {local_var} \n' + f' != {target_var} == target_var\n' + f' i am expecting {local_var} to be in the filename, and i expect the variable\n' + f' in that file to be {target_var}') + + + # examine input directory to obtain a list of input file targets + var_filenames = [] + get_var_filenames(indir, var_filenames, local_var) + print(f"(cmor_run_subtool) found filenames = \n {var_filenames}") - print(f'(cmor_run_subtool) ..............beginning CMORization for {local_var}/' - f'{target_var}..........') + # examine input files to obtain target date ranges + iso_datetime_arr = [] + get_iso_datetimes(var_filenames, iso_datetime_arr) + print(f"(cmor_run_subtool) found iso datetimes = \n {iso_datetime_arr}") + + # name_of_set == component label... + # which is not relevant for CMOR/CMIP... or is it? + name_of_set = var_filenames[0].split(".")[0] + print(f"(cmor_run_subtool) setting name_of_set = {name_of_set}") + + + + print(f'(cmor_run_subtool) ..............beginning CMORization for {local_var}/\n' + f' {target_var}..........') cmorize_target_var_files( indir, target_var, local_var, iso_datetime_arr, # OK name_of_set, json_exp_config, @@ -584,13 +634,18 @@ def cmor_run_subtool( indir = None, proj_table_vars, json_table_config # a little redundant ) + if DEBUG_MODE_RUN_ONE: + print(f'WARNING: DEBUG_MODE_RUN_ONE is True. breaking var_list loop') + break + return 0 + @click.command() def _cmor_run_subtool(indir = None, json_var_list = None, json_table_config = None, json_exp_config = None, - outdir = None): + outdir = None, opt_var_name = None): ''' entry point to fre cmor run for click. see cmor_run_subtool for argument descriptions.''' - return cmor_run_subtool(indir, json_var_list, json_table_config, json_exp_config, outdir) + return cmor_run_subtool(indir, json_var_list, json_table_config, json_exp_config, outdir, opt_var_name) if __name__ == '__main__': diff --git a/fre/cmor/frecmor.py b/fre/cmor/frecmor.py index f764a546..e882186a 100644 --- a/fre/cmor/frecmor.py +++ b/fre/cmor/frecmor.py @@ -11,35 +11,53 @@ def cmor_cli(): @cmor_cli.command() @click.option("-d", "--indir", type=str, - help="Input directory", + help="directory containing netCDF files. keys specified in json_var_list are local " + \ + "variable names used for targeting specific files in this directory", required=True) @click.option("-l", "--varlist", type=str, - help="Variable list", + help="path pointing to a json file containing directory of key/value pairs. " + \ + "the keys are the \'local\' names used in the filename, and the values " + \ + "pointed to by those keys are strings representing the name of the variable " + \ + "contained in targeted files. the key and value are often the same, " + \ + "but it is not required.", required=True) @click.option("-r", "--table_config", type=str, - help="Table configuration", + help="json file containing CMIP-compliant per-variable/metadata for specific " + \ + "MIP table. The MIP table can generally be identified by the specific " + \ + "filename (e.g. \'Omon\')", required=True) @click.option("-p", "--exp_config", type=str, - help="Experiment configuration", + help="json file containing metadata dictionary for CMORization. this metadata is " + \ + "effectively appended to the final output file's header", required=True) @click.option("-o", "--outdir", type=str, - help="Output directory", + help="directory root that will contain the full output and output directory " + \ + "structure generated by the cmor module upon request.", required=True) +@click.option('-v', "--opt_var_name", + type = str, + help="optional, specify a variable name to specifically process only filenames " + \ + "matching that variable name. I.e., this string help target local_vars, not " + \ + "target_vars.", + required=False) @click.pass_context -def run(context, indir, varlist, table_config, exp_config, outdir): +def run(context, indir, varlist, table_config, exp_config, outdir, opt_var_name): # pylint: disable=unused-argument - """Rewrite climate model output""" + """ + Rewrite climate model output files with CMIP-compliant metadata for down-stream publishing + """ context.invoke( _cmor_run_subtool, indir = indir, json_var_list = varlist, json_table_config = table_config, json_exp_config = exp_config, - outdir = outdir + outdir = outdir, + opt_var_name = opt_var_name ) # context.forward( # _cmor_run_subtool() ) diff --git a/fre/gfdl_msd_schemas b/fre/gfdl_msd_schemas new file mode 160000 index 00000000..04c8150b --- /dev/null +++ b/fre/gfdl_msd_schemas @@ -0,0 +1 @@ +Subproject commit 04c8150bc362304d82e60e765405135460b69f06 diff --git a/fre/make/fremake.py b/fre/make/fremake.py index 2020e5b0..f78f1773 100644 --- a/fre/make/fremake.py +++ b/fre/make/fremake.py @@ -86,7 +86,7 @@ def make_cli(): @click.pass_context def run_fremake(context, yamlfile, platform, target, parallel, jobs, no_parallel_checkout, verbose, force_checkout, force_compile): """ - Perform all fremake functions to run checkout and compile model""" - context.forward(runfremake._fremake_run) + context.forward(runFremake._fremake_run) #### @make_cli.command() diff --git a/fre/make/gfdlfremake/checkout.py b/fre/make/gfdlfremake/checkout.py index 9afd0ef9..d3fa5d4e 100644 --- a/fre/make/gfdlfremake/checkout.py +++ b/fre/make/gfdlfremake/checkout.py @@ -111,7 +111,7 @@ def finish (self,pc): ## TODO: batch script building def run (self): """ - Brief: Changes the permission on the checkout script and runs it + Brief: Runs the checkout script Param: - self The checkout script object """ diff --git a/fre/make/gfdlfremake/platformfre.py b/fre/make/gfdlfremake/platformfre.py index fe8924f9..4f8d0eed 100644 --- a/fre/make/gfdlfremake/platformfre.py +++ b/fre/make/gfdlfremake/platformfre.py @@ -52,7 +52,7 @@ def __init__(self,platforminfo): p["container"] except: p["container"] = False - p["RUNenv"] = "" + p["RUNenv"] = [""] p["containerBuild"] = "" p["containerRun"] = "" if p["container"]: diff --git a/fre/make/gfdlfremake/schema.json b/fre/make/gfdlfremake/schema.json deleted file mode 100644 index 751bb9db..00000000 --- a/fre/make/gfdlfremake/schema.json +++ /dev/null @@ -1,201 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-06/schema#", - "type": "object", - "additionalProperties": false, - "properties": { - "name": { - "description": "The name of the experiment", - "type": "string" - }, - "platform": { - "description": "The platforms listed in the command", - "type": "string" - }, - "target": { - "description": "The targets listed in the command", - "type": "string" - }, - "build": { - "type": "object", - "additionalProperties": false, - "properties": { - "compileYaml": { - "description": "Path to the compile yaml.", - "type": "string" - }, - "platformYaml": { - "description": "Path to the platform yaml.", - "type": "string" - } - } - }, - "compile": { - "description": "The source code descriptions", - "$ref": "#/definitions/Compile" - }, - "platforms": { - "description": "FRE platforms", - "type": "array", - "items": {"$ref": "#/definitions/Platform"} - } - }, - "definitions": { - "Compile": { - "type": "object", - "properties": { - "experiment": { - "description": "The name of the model", - "type": "string" - }, - "container_addlibs": { - "description": "Libraries and packages needed for linking in the container", - "type": ["array","string","null"] - }, - "baremetal_linkerflags": { - "description": "Linker flags of libraries and packages needed for linking in the bare-metal build", - "type": ["array","string","null"] - }, - "src": { - "type": "array", - "items": {"$ref": "#/definitions/Src"} - } - } - }, - "Src": { - "type": "object", - "properties": { - "component": { - "description": "The name of the model component", - "type": "string" - }, - "repo": { - "anyOf": [ - { - "description": "The URL of the code repository", - "type": "array", - "items": { - "type": "string", - "format": "uri", - "qt-uri-protocols": [ - "https" - ], - "qt-uri-extensions": [ - ".git" - ] - } - }, - { - "description": "The URL of the code repository", - "type": "string", - "format": "uri", - "qt-uri-protocols": [ - "https" - ], - "qt-uri-extensions": [ - ".git" - ] - } - ] - }, - "cppdefs": { - "description": "String of CPPDEFs to include in compiling the component", - "type": "string" - }, - "branch": { - "anyOf": [ - { - "description": "The version of code to clone", - "type": "array", - "items": { - "type": "string" - } - }, - { - "description": "The version of code to clone", - "type": "string" - } - ] - }, - "otherFlags": { - "description": "String of Include flags necessary to retrieve other code needed", - "type": "string" - }, - "requires": { - "description": "list of componets that this component depends on", - "type": "array", - "items": {"type": "string"} - }, - "paths": { - "description": "A list of the paths in the component to compile", - "type": "array", - "items": {"type": "string"} - }, - "doF90Cpp": { - "description": "True if the preprocessor needs to be run", - "type": "boolean" - }, - "makeOverrides": { - "description": "Overrides openmp target for MOM6", - "type": "string" - } - } - }, - "Platform": { - "type": "object", - "properties": { - "name": { - "description": "The name of the platform", - "type": "string" - }, - "compiler": { - "description": "The compiler used to build the model", - "type": "string" - }, - "modulesInit": { - "description": "Array of commands to run before loading modules", - "type": "array", - "items": {"type": "string"} - }, - "modules": { - "description": "List (array) of modules to load", - "type": "array", - "items": { - "type": "string" - } - }, - "fc": { - "description": "The Fortran compiler", - "type": "string" - }, - "cc": { - "description": "The C compiler", - "type": "string" - }, - "mkTemplate": { - "description": "Path to the mk template file", - "type": "string" - }, - "modelRoot": { - "description": "Path to the root for all model install files", - "type": "string" - }, - "RUNenv": { - "description": "Commands needed at the beginning of a RUN in dockerfile", - "type": ["array","string"] - }, - "container": { - "description": "True/False if using container to compile", - "type": "boolean" - }, - "containerBuild": { - "description": "Program used to build the container", - "type": "string" - }, - "containerRun": { - "description": "Program used to run the container", - "type": "string" - } - } - } - } -} diff --git a/fre/make/gfdlfremake/yamlfre.py b/fre/make/gfdlfremake/yamlfre.py index 6f638bbb..72458a8c 100644 --- a/fre/make/gfdlfremake/yamlfre.py +++ b/fre/make/gfdlfremake/yamlfre.py @@ -1,5 +1,6 @@ import os import json +from pathlib import Path import yaml from jsonschema import validate, ValidationError, SchemaError from . import platformfre @@ -176,8 +177,8 @@ def __init__(self,combinedyaml,v): #self.freyaml.update(self.platformsyaml) ## VALIDATION OF COMBINED YAML FOR COMPILATION - fremake_package_dir = os.path.dirname(os.path.abspath(__file__)) - schema_path = os.path.join(fremake_package_dir, 'schema.json') + fremake_package_dir = Path(__file__).resolve().parents[2] + schema_path = os.path.join(fremake_package_dir, 'gfdl_msd_schemas', 'FRE', 'fre_make.json') with open(schema_path, 'r') as f: s = f.read() schema = json.loads(s) diff --git a/fre/make/runFremake.py b/fre/make/runFremake.py index f96348c0..00c7e039 100644 --- a/fre/make/runFremake.py +++ b/fre/make/runFremake.py @@ -86,6 +86,7 @@ def fremake_run(yamlfile,platform,target,parallel,jobs,no_parallel_checkout,verb freCheckout = checkout.checkout("checkout.sh",srcDir) freCheckout.writeCheckout(modelYaml.compile.getCompileYaml(),jobs,pc) freCheckout.finish(pc) + os.chmod(srcDir+"/checkout.sh", 0o744) ## TODO: Options for running on login cluster? freCheckout.run() diff --git a/fre/make/tests/test_create_makefile.py b/fre/make/tests/test_create_makefile.py new file mode 100644 index 00000000..36188b33 --- /dev/null +++ b/fre/make/tests/test_create_makefile.py @@ -0,0 +1,72 @@ +""" +Test fre make create-makefile +""" +import os +import shutil +from pathlib import Path +from fre.make import createMakefile + +# SET-UP +test_dir = Path("fre/make/tests") +NM_EXAMPLE = Path("null_example") +YAMLFILE = "null_model.yaml" +BM_PLATFORM = ["ncrc5.intel23"] +CONTAINER_PLATFORM = ["hpcme.2023"] +TARGET = ["debug"] +EXPERIMENT = "null_model_full" + +# Create output location +out = f"{test_dir}/makefile_out" +if Path(out).exists(): + # remove + shutil.rmtree(out) + # create output directory + Path(out).mkdir(parents=True,exist_ok=True) +else: + Path(out).mkdir(parents=True,exist_ok=True) + +# Set output directory as home for fre make output +#os.environ["HOME"]=str(Path(out)) + +def test_modelyaml_exists(): + """ + Check the model yaml exists + """ + assert Path(f"{test_dir}/{NM_EXAMPLE}/{YAMLFILE}").exists() + +def test_compileyaml_exists(): + """ + Check the compile yaml exists + """ + assert Path(f"{test_dir}/{NM_EXAMPLE}/compile.yaml").exists() + +def test_platformyaml_exists(): + """ + Check the platform yaml exists + """ + assert Path(f"{test_dir}/{NM_EXAMPLE}/platforms.yaml").exists() + +def test_bm_makefile_creation(): + """ + Check the makefile is created when a bare-metal platform is used + """ + # Set output directory as home for fre make output + os.environ["HOME"]=str(Path(out)) + + bm_plat = BM_PLATFORM[0] + targ = TARGET[0] + yamlfile_path = f"{test_dir}/{NM_EXAMPLE}/{YAMLFILE}" + + createMakefile.makefile_create(yamlfile_path,BM_PLATFORM,TARGET) + + assert Path(f"{out}/fremake_canopy/test/{EXPERIMENT}/{bm_plat}-{targ}/exec/Makefile").exists() + +def test_container_makefile_creation(): + """ + Check the makefile is created when the container platform is used + """ + container_plat = CONTAINER_PLATFORM[0] + yamlfile_path = f"{test_dir}/{NM_EXAMPLE}/{YAMLFILE}" + createMakefile.makefile_create(yamlfile_path,CONTAINER_PLATFORM,TARGET) + + assert Path(f"tmp/{container_plat}/Makefile").exists() diff --git a/fre/pp/configure_script_yaml.py b/fre/pp/configure_script_yaml.py index 443d6e00..b782e3de 100644 --- a/fre/pp/configure_script_yaml.py +++ b/fre/pp/configure_script_yaml.py @@ -147,7 +147,7 @@ def set_rose_apps(yamlfile,rose_regrid,rose_remap): value=f'{interp_split[0]}_{interp_split[1]}.{interp_method}') #################### -def _yamlInfo(yamlfile,experiment,platform,target): +def yamlInfo(yamlfile,experiment,platform,target): """ Using a valid pp.yaml, the rose-app and rose-suite configuration files are created in the cylc-src @@ -200,12 +200,12 @@ def _yamlInfo(yamlfile,experiment,platform,target): print(" " + outfile) @click.command() -def yamlInfo(yamlfile,experiment,platform,target): +def _yamlInfo(yamlfile,experiment,platform,target): ''' Wrapper script for calling yamlInfo - allows the decorated version of the function to be separate from the undecorated version ''' - return _yamlInfo(yamlfile,experiment,platform,target) + return yamlInfo(yamlfile,experiment,platform,target) # Use parseyaml function to parse created edits.yaml if __name__ == '__main__': diff --git a/fre/pp/tests/test_configure_script_yaml.py b/fre/pp/tests/test_configure_script_yaml.py index 1f61efd4..eaf1fc2e 100644 --- a/fre/pp/tests/test_configure_script_yaml.py +++ b/fre/pp/tests/test_configure_script_yaml.py @@ -1,25 +1,24 @@ +""" +Test configure_script_yaml +""" import os from pathlib import Path from fre.pp import configure_script_yaml as csy # Set what would be click options -experiment = "c96L65_am5f7b12r1_amip" -platform = "gfdl.ncrc5-intel22-classic" -target = "prod-openmp" +EXPERIMENT = "c96L65_am5f7b12r1_amip" +PLATFORM = "gfdl.ncrc5-intel22-classic" +TARGET = "prod-openmp" # Set example yaml paths, input directory -CWD = Path.cwd() test_dir = Path("fre/pp/tests") -test_yaml = Path(f"AM5_example/am5.yaml") - -# Set home for ~/cylc-src location in script -os.environ["HOME"]=str(Path(f"{CWD}/{test_dir}/configure_yaml_out")) +test_yaml = Path("AM5_example/am5.yaml") def test_combinedyaml_exists(): """ Make sure combined yaml file exists """ - assert Path(f"{CWD}/{test_dir}/{test_yaml}").exists() + assert Path(f"{test_dir}/{test_yaml}").exists() def test_configure_script(): """ @@ -27,23 +26,21 @@ def test_configure_script(): Creates rose-suite, regrid rose-app, remap rose-app TO-DO: will break this up for better tests """ - os.chdir(f"{CWD}/{test_dir}/AM5_example") + # Set home for ~/cylc-src location in script + os.environ["HOME"]=str(Path(f"{test_dir}/configure_yaml_out")) # Set output directory - out_dir = Path(f"{os.getenv('HOME')}/cylc-src/{experiment}__{platform}__{target}") + out_dir = Path(f"{os.getenv('HOME')}/cylc-src/{EXPERIMENT}__{PLATFORM}__{TARGET}") Path(out_dir).mkdir(parents=True,exist_ok=True) # Define combined yaml - model_yaml = str(Path(f"{CWD}/{test_dir}/{test_yaml}")) + model_yaml = str(Path(f"{test_dir}/{test_yaml}")) # Invoke configure_yaml_script.py - csy._yamlInfo(model_yaml,experiment,platform,target) + csy.yamlInfo(model_yaml,EXPERIMENT,PLATFORM,TARGET) # Check for configuration creation and final combined yaml - assert all([Path(f"{out_dir}/{experiment}.yaml").exists(), + assert all([Path(f"{out_dir}/{EXPERIMENT}.yaml").exists(), Path(f"{out_dir}/rose-suite.conf").exists(), Path(f"{out_dir}/app/regrid-xy/rose-app.conf").exists(), Path(f"{out_dir}/app/remap-pp-components/rose-app.conf").exists()]) - - # Go back to original directory - os.chdir(CWD) diff --git a/fre/tests/test_files/CMORbite_var_list.json b/fre/tests/test_files/CMORbite_var_list.json new file mode 100644 index 00000000..5b75e54f --- /dev/null +++ b/fre/tests/test_files/CMORbite_var_list.json @@ -0,0 +1,11 @@ +{ + "lai": "lai", + "t_ref": "t_ref", + "cl": "cl", + "mc": "mc", + "ta": "ta", + "sos": "sos", + "so": "so", + "ch4global": "ch4global", + "gppLut": "gppLut" +} diff --git a/fre/tests/test_fre_app_cli.py b/fre/tests/test_fre_app_cli.py index 4b61ff75..ca14df51 100644 --- a/fre/tests/test_fre_app_cli.py +++ b/fre/tests/test_fre_app_cli.py @@ -1,6 +1,7 @@ """ test "fre app" calls """ import os +import subprocess from pathlib import Path import click diff --git a/fre/yamltools/combine_yamls.py b/fre/yamltools/combine_yamls.py index 4584c115..b2b6540f 100755 --- a/fre/yamltools/combine_yamls.py +++ b/fre/yamltools/combine_yamls.py @@ -90,7 +90,7 @@ def experiment_check(mainyaml_dir,comb,experiment): if expyaml is not None: ey_path=[] for e in expyaml: - if Path(e).exists(): + if Path(os.path.join(mainyaml_dir,e)).exists(): ey=Path(os.path.join(mainyaml_dir,e)) ey_path.append(ey) else: @@ -115,7 +115,7 @@ def experiment_check(mainyaml_dir,comb,experiment): class init_compile_yaml(): def __init__(self,yamlfile,platform,target): """ - Process to combine yamls appllicable to compilation + Process to combine yamls applicable to compilation """ self.yml = yamlfile self.name = yamlfile.split(".")[0] diff --git a/fre/yamltools/tests/test_combine_yamls.py b/fre/yamltools/tests/test_combine_yamls.py index f9e95fa2..7df6eb36 100644 --- a/fre/yamltools/tests/test_combine_yamls.py +++ b/fre/yamltools/tests/test_combine_yamls.py @@ -13,13 +13,14 @@ ## SET-UP # Set example yaml paths, input directory, output directory -CWD = Path.cwd() +#CWD = Path.cwd() TEST_DIR = Path("fre/yamltools/tests") -IN_DIR = Path(f"{CWD}/{TEST_DIR}/AM5_example") +IN_DIR = Path(f"{TEST_DIR}/AM5_example") +SCHEMA_DIR = Path("fre/gfdl_msd_schemas/FRE") # Create output directories -COMP_OUT_DIR = Path(f"{CWD}/{TEST_DIR}/combine_yamls_out/compile") -PP_OUT_DIR = Path(f"{CWD}/{TEST_DIR}/combine_yamls_out/pp") +COMP_OUT_DIR = Path(f"{TEST_DIR}/combine_yamls_out/compile") +PP_OUT_DIR = Path(f"{TEST_DIR}/combine_yamls_out/pp") # If output directory exists, remove and create again for out in [COMP_OUT_DIR, PP_OUT_DIR]: @@ -63,32 +64,26 @@ def test_merged_compile_yamls(): Check for the creation of the combined-[experiment] yaml Check that the model yaml was merged into the combined yaml """ - # Go into the input directory - os.chdir(IN_DIR) - # Model yaml path - modelyaml = "am5.yaml" + modelyaml = str(Path(f"{IN_DIR}/am5.yaml")) use = "compile" # Merge the yamls cy.consolidate_yamls(modelyaml, COMP_EXPERIMENT, COMP_PLATFORM, COMP_TARGET, use) # Move combined yaml to output location - shutil.move("combined-am5.yaml", COMP_OUT_DIR) + shutil.move(f"{IN_DIR}/combined-am5.yaml", COMP_OUT_DIR) # Check that the combined yaml exists assert Path(f"{COMP_OUT_DIR}/combined-{COMP_EXPERIMENT}.yaml").exists() - # Go back to original directory - os.chdir(CWD) - def test_combined_compileyaml_validation(): """ Validate the combined compile yaml """ combined_yamlfile =f"{COMP_OUT_DIR}/combined-{COMP_EXPERIMENT}.yaml" - schema_file = os.path.join(f"{IN_DIR}","compile_yamls","schema.json") - + schema_file = os.path.join(SCHEMA_DIR, "fre_make.json") + with open(combined_yamlfile,'r') as cf: yml = yaml.safe_load(cf) @@ -108,48 +103,39 @@ def test_combined_compileyaml_combinefail(): Check to test if compile yaml is incorrect/does not exist, the combine fails. (compile yaml path misspelled) """ - # Go into the input directory - os.chdir(f"{IN_DIR}/compile_yamls/compile_fail") - # Model yaml path - modelyaml = "am5-wrong_compilefile.yaml" + modelyaml = str(Path(f"{IN_DIR}/compile_yamls/compile_fail/am5-wrong_compilefile.yaml")) use = "compile" # Merge the yamls - should fail since there is no compile yaml specified in the model yaml try: cy.consolidate_yamls(modelyaml, COMP_EXPERIMENT, COMP_PLATFORM, COMP_TARGET, use) # Move combined yaml to output location - shutil.move("combined-am5-wrong_compilefile.yaml", COMP_OUT_DIR) + shutil.move(f"{IN_DIR}/compile_yamls/compile_fail/combined-am5-wrong_compilefile.yaml", COMP_OUT_DIR) except: print("EXPECTED FAILURE") # Move combined yaml to output location - shutil.move("combined-am5-wrong_compilefile.yaml", COMP_OUT_DIR) + shutil.move(f"{IN_DIR}/compile_yamls/compile_fail/combined-am5-wrong_compilefile.yaml", COMP_OUT_DIR) assert True - # Go back to original directory - os.chdir(CWD) - def test_combined_compileyaml_validatefail(): """ Check if the schema is validating correctly Branch should be string """ - # Go into the input directory - os.chdir(f"{IN_DIR}/compile_yamls/compile_fail") - # Model yaml path - modelyaml = "am5-wrong_datatype.yaml" + modelyaml = str(Path(f"{IN_DIR}/compile_yamls/compile_fail/am5-wrong_datatype.yaml")) use = "compile" # Merge the yamls cy.consolidate_yamls(modelyaml, COMP_EXPERIMENT, COMP_PLATFORM, COMP_TARGET, use) # Move combined yaml to output location - shutil.move("combined-am5-wrong_datatype.yaml", COMP_OUT_DIR) + shutil.move(f"{IN_DIR}/compile_yamls/compile_fail/combined-am5-wrong_datatype.yaml", COMP_OUT_DIR) # Validate against schema; should fail wrong_combined = Path(f"{COMP_OUT_DIR}/combined-am5-wrong_datatype.yaml") - schema_file = os.path.join(f"{IN_DIR}","compile_yamls","schema.json") + schema_file = os.path.join(SCHEMA_DIR, "fre_make.json") # Open/load combined yaml file with open(wrong_combined,'r') as cf: @@ -166,9 +152,6 @@ def test_combined_compileyaml_validatefail(): except: assert True - # Go back to original directory - os.chdir(CWD) - ############ PP ############ def test_expyaml_exists(): """ @@ -188,9 +171,6 @@ def test_merged_pp_yamls(): Check for the creation of the combined-[experiment] yaml Check that the model yaml was merged into the combined yaml """ - # Go into the input directory - os.chdir(IN_DIR) - # Model yaml path modelyaml = Path(f"{IN_DIR}/am5.yaml") use = "pp" @@ -204,9 +184,6 @@ def test_merged_pp_yamls(): # Check that the combined yaml exists assert Path(f"{PP_OUT_DIR}/combined-{PP_EXPERIMENT}.yaml").exists() - # Go back to original directory - os.chdir(CWD) - def test_combined_ppyaml_validation(): """ Validate the combined compile yaml diff --git a/meta.yaml b/meta.yaml index 07f76686..a11f0151 100644 --- a/meta.yaml +++ b/meta.yaml @@ -5,10 +5,10 @@ package: version: '{{ environ.get("GIT_DESCRIBE_TAG", data.get("version")) }}' source: - path: . +# path: . # ideally we want this git_url path, but it messes with conda publish # where it builds only the sourced url and not local/branch changes -# git_url: https://github.com/NOAA-GFDL/fre-cli + git_url: https://github.com/NOAA-GFDL/fre-cli.git build: script: diff --git a/run_test_file_cases.py b/run_test_file_cases.py new file mode 100644 index 00000000..a588f803 --- /dev/null +++ b/run_test_file_cases.py @@ -0,0 +1,196 @@ +#!/usr/bin/env python +''' +this is a quick and dirty script. +it will not be maintained. it will not be supported. +it is for a very context-dependent set of tests for a very specific point in time. +''' + + +import sys +import os +from pathlib import Path + +import fre +from fre.cmor.cmor_mixer import cmor_run_subtool as run_cmor + +def print_the_outcome(some_return,case_str): + print('-----------------------------------------------------------------------------------------------------------------') + if some_return != 0: + print(f'{case_str} case failed[[[FAIL -_-]]]: some_return={some_return}') + else: + print(f'{case_str} case probably OK [[[PROB-OK ^-^]]]: some_return={some_return}') + print('-----------------------------------------------------------------------------------------------------------------') + print(f'\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n') + assert some_return == 0 + +# global consts for these tests, with no/trivial impact on the results +ROOTDIR='fre/tests/test_files' +CMORBITE_VARLIST=f'{ROOTDIR}/CMORbite_var_list.json' + +# this file exists basically for users to specify their own information to append to the netcdf file +# i.e., it fills in FOO/BAR/BAZ style values, and what they are currently is totally irrelevant +EXP_CONFIG_DEFAULT=f'{ROOTDIR}/CMOR_input_example.json' # this likely is not sufficient + + +def run_cmor_RUN(filename, table, opt_var_name): + func_debug1 = False + if func_debug1: + print('run_cmor(\n' + f' indir = \"{str(Path(filename).parent)}\",\n' + f' json_var_list = \"{CMORBITE_VARLIST}\",\n' + f' json_table_config = \"{ROOTDIR}/cmip6-cmor-tables/Tables/CMIP6_{table}.json\",\n' + f' json_exp_config = \"{EXP_CONFIG_DEFAULT}\",\n' + f' outdir = \"{os.getcwd()}\",\n' + f' opt_var_name = \"{opt_var_name}\"\n' + ')\n' + ) + func_debug2 = True + if func_debug2: + print('fre cmor run ' + f'-d {str(Path(filename).parent)} ' + f'-l {CMORBITE_VARLIST} ' + f'-r {ROOTDIR}/cmip6-cmor-tables/Tables/CMIP6_{table}.json ' + f'-p {EXP_CONFIG_DEFAULT} ' + f'-o {os.getcwd()} ' + f'-v {opt_var_name} ' + ) + FOO_return = run_cmor( + indir = str(Path(filename).parent), + json_var_list = CMORBITE_VARLIST, + json_table_config = f'{ROOTDIR}/cmip6-cmor-tables/Tables/CMIP6_{table}.json', + json_exp_config = EXP_CONFIG_DEFAULT, + outdir = os.getcwd(), # dont keep it this way... + opt_var_name = opt_var_name + ) + return FOO_return + + +## 1) SUCCEEDs +## land, Lmon, gr1 +#testfile_land_gr1_Lmon = \ +# '/archive/Eric.Stofferahn/CMIP7/ESM4/DEV/ESM4.5v01_om5b04_piC/gfdl.ncrc5-intel23-prod-openmp/pp/land/ts/monthly/5yr/land.005101-005512.lai.nc' +#try: +# some_return = run_cmor_RUN(testfile_land_gr1_Lmon, 'Lmon', opt_var_name = 'lai') +#except: +# print(f'exception caught: exc=\n{exc}') +# some_return=-1 +# pass +#print_the_outcome(some_return,'land_gr1_Lmon / lai') + + +## 2) SUCCEEDs +## atmos, Amon / cl +#testfile_atmos_level_cmip_gr1_Amon_complex_vert = \ +# '/arch0/cm6/ESM4/DECK/ESM4_historical_D1/gfdl.ncrc4-intel16-prod-openmp/pp/atmos_level_cmip/ts/monthly/5yr/atmos_level_cmip.196001-196412.cl.nc' +#try: +# some_return = run_cmor_RUN(testfile_atmos_level_cmip_gr1_Amon_complex_vert, 'Amon', opt_var_name = 'cl') +#except Exception as exc: +# print(f'exception caught: exc=\n{exc}') +# some_return=-1 +# pass +#print_the_outcome(some_return,'atmos_level_cmip_gr1_Amon_complex_vert / cl') + + +## 3) SUCCEEDs +## atmos, Amon / mc +#testfile_atmos_level_cmip_gr1_Amon_fullL = \ +# '/arch0/cm6/ESM4/DECK/ESM4_historical_D1/gfdl.ncrc4-intel16-prod-openmp/pp/atmos_level_cmip/ts/monthly/5yr/atmos_level_cmip.195501-195912.mc.nc' +#try: +# some_return = run_cmor_RUN(testfile_atmos_level_cmip_gr1_Amon_fullL, 'Amon', opt_var_name = 'mc') +#except Exception as exc: +# print(f'exception caught: exc=\n{exc}') +# some_return=-1 +# pass +#print_the_outcome(some_return,'atmos_level_cmip_gr1_Amon_fullL / mc') + + +# 4) FAIL (no longitude coordinate case) +# atmos, Amoon / ta +# just like #1, but lack longitude +# Result - error, File "/home/Ian.Laflotte/Working/fre-cli/fre/cmor/cmor_mixer.py", line 195, in rewrite_netcdf_file_var lon = ds["lon"][:] File "src/netCDF4/_netCDF4.pyx", line 2519, in netCDF4._netCDF4.Dataset.__getitem__ IndexError: lon not found in / +testfile_atmos_gr1_AmonZ_nolons = \ + '/arch0/cm6/ESM4/DECK/ESM4_historical_D1/gfdl.ncrc4-intel16-prod-openmp/pp/atmos_plev39_cmip/ts/monthly/5yr/zonavg/atmos_plev39_cmip.201001-201412.ta.nc' +try: + some_return = run_cmor_RUN(testfile_atmos_gr1_AmonZ_nolons, 'Amon', opt_var_name = 'ta') +except Exception as exc: + print(f'exception caught: exc=\n{exc}') + some_return=-1 + pass +print_the_outcome(some_return,'atmos_gr1_AmonZ_nolons / ta') + + +## 5) SUCCEEDS +## ocean, Omon / sos +#testfile_ocean_monthly_1x1deg_gr = \ +# '/arch0/cm6/ESM4/DECK/ESM4_historical_D1/gfdl.ncrc4-intel16-prod-openmp/pp/ocean_monthly_1x1deg/ts/monthly/5yr/ocean_monthly_1x1deg.190001-190412.sos.nc' +#try: +# some_return = run_cmor_RUN(testfile_ocean_monthly_1x1deg_gr, 'Omon', opt_var_name = 'sos') +#except Exception as exc: +# print(f'exception caught: exc=\n{exc}') +# some_return=-1 +# pass +#print_the_outcome(some_return,'ocean_monthly_1x1deg_gr / sos') + + + +## 6) FAIL (copy_nc failure!!! WEIRD) +## ocean, Omon / sos +## Result - error, AttributeError: NetCDF: Attempt to define fill value when data already exists. +#testfile_ocean_monthly_gn = \ +# '/archive/ejs/CMIP7/ESM4/DEV/ESM4.5v01_om5b04_piC/gfdl.ncrc5-intel23-prod-openmp/pp/ocean_monthly/ts/monthly/5yr/ocean_monthly.002101-002512.sos.nc' +#try: +# some_return = run_cmor_RUN(testfile_ocean_monthly_gn, 'Omon', opt_var_name = 'sos') +#except Exception as exc: +# print(f'exception caught: exc=\n{exc}') +# some_return=-1 +# pass +#print_the_outcome(some_return,'ocean_monthly_gn / sos') + + + +## 7) FAIL (copy_nc failure!!! WEIRD) +## ocean, Omon / so +## Result - identical failure to #6 +#testfile_ocean_monthly_z_1x1deg_gr = \ +# '/archive/ejs/CMIP7/ESM4/DEV/ESM4.5v01_om5b04_piC/gfdl.ncrc5-intel23-prod-openmp/pp/ocean_monthly_z_1x1deg/ts/monthly/5yr/ocean_monthly_z_1x1deg.000101-000512.so.nc' +#try: +# some_return = run_cmor_RUN(testfile_ocean_monthly_z_1x1deg_gr, 'Omon', opt_var_name = 'so') +#except Exception as exc: +# print(f'exception caught: exc=\n{exc}') +# some_return=-1 +# pass +#print_the_outcome(some_return,'ocean_monthly_z_1x1deg_gr / so') + + +# 8) FAIL (no latitude nor longitude coordinates cases) +# atmos, Amon / ch4global +# Result - error, File "src/netCDF4/_netCDF4.pyx", line 2519, in netCDF4._netCDF4.Dataset.__getitem__ IndexError: lat not found in / +testfile_atmos_scalar_gn_Amon_nolon_nolat = \ + '/arch0/cm6/ESM4/DECK/ESM4_historical_D1/gfdl.ncrc4-intel16-prod-openmp/pp/atmos_scalar/ts/monthly/5yr/atmos_scalar.197001-197412.ch4global.nc' +try: + some_return = run_cmor_RUN(testfile_atmos_scalar_gn_Amon_nolon_nolat, 'Amon', opt_var_name = 'ch4global') +except Exception as exc: + print(f'exception caught: exc=\n{exc}') + some_return=-1 + pass +print_the_outcome(some_return,'atmos_scalar_gn_Amon_nolon_nolat / ch4global') + + +## 9) FAIL (4 dimensional data with no vertical) +## Result - error, +## File "/home/Ian.Laflotte/Working/fre-cli/fre/cmor/cmor_mixer.py", +## line 134, in get_vertical_dimension if not (ds[dim].axis and ds[dim].axis == "Z"): +## AttributeError: NetCDF: Attribute not found +#testfile_LUmip_refined_gr1_Emon_landusedim = \ +# '/arch0/cm6/ESM4/DECK/ESM4_historical_D1/gfdl.ncrc4-intel16-prod-openmp/pp/LUmip_refined/ts/monthly/5yr/LUmip_refined.185001-185412.gppLut.nc' +#try: +# some_return = run_cmor_RUN(testfile_LUmip_refined_gr1_Emon_landusedim, 'Emon', opt_var_name = 'gppLut') +#except Exception as exc: +# print(f'exception caught: exc=\n{exc}') +# some_return=-1 +# pass +#print_the_outcome(some_return,'LUmip_refined_gr1_Emon_langusedim / gppLut') + + + +