diff --git a/payu_config/archive.sh b/payu_config/archive.sh index b5d3ff4..f098f15 100644 --- a/payu_config/archive.sh +++ b/payu_config/archive.sh @@ -2,4 +2,5 @@ source $(dirname "$0")/archive_scripts/archive_cice_restarts.sh source $(dirname "$0")/archive_scripts/concat_ice_daily.sh +source $(dirname "$0")/archive_scripts/standardise_mom6_filenames.sh python3 $(dirname "$0")/archive_scripts/build_intake_ds.py diff --git a/payu_config/archive_scripts/standardise_mom6_filenames.sh b/payu_config/archive_scripts/standardise_mom6_filenames.sh new file mode 100755 index 0000000..7253ff2 --- /dev/null +++ b/payu_config/archive_scripts/standardise_mom6_filenames.sh @@ -0,0 +1,53 @@ +#!/usr/bin/bash +# Copyright 2024 ACCESS-NRI and contributors. See the top-level COPYRIGHT file for details. +# SPDX-License-Identifier: Apache-2.0. +# +# Standardise file naming for MOM6 output files in access-om3 by removing the underscore before the four-digit year, i.e., replacing '_YYYY' with 'YYYY' +# This was written assuming it would be used as a payu "userscript" at the "archive" stage, but alternatively a path to an "archive" directory can be provided. +# For more details, see https://github.com/COSIMA/om3-scripts/issues/32 + +Help() +{ + # Display help + echo -e "Standardise file naming for MOM6 output files.\n" + echo "Syntax: scriptTemplate [-h|d DIRECTORY]" + echo "options:" + echo "h Print this help message." + echo -e "d Process files in the specified 'DIRECTORY'." +} + +while getopts ":hd:" option; do + case $option in + h) # display help + Help + exit;; + d) # Enter a directory + out_dir=$OPTARG + if [ ! -d $out_dir ]; then + echo $out_dir Does not exist + exit + fi;; + \?) # Invalid option + echo "Error: Invalid option" + exit;; + esac +done + +# if no directory was specified, collect all directories from 'archive' +if [ -z $out_dir ]; then + out_dirs=$(ls -rd archive/output*[0-9] 2>/dev/null) +else + out_dirs=$out_dir +fi + +# process each output directory +for dir in ${out_dirs[@]}; do + # process each mom6 file + for current_file in $dir/access-om3.mom6.*.nc; do + if [ -f $current_file ]; then + new_filename=$(echo $current_file | sed -E 's/_([0-9]{4})/\1/') + # rename the file without overwriting existing files + mv -n $current_file $new_filename + fi + done +done diff --git a/test/test_payu_conf/test_mom6_filenames.py b/test/test_payu_conf/test_mom6_filenames.py new file mode 100644 index 0000000..1d45c8a --- /dev/null +++ b/test/test_payu_conf/test_mom6_filenames.py @@ -0,0 +1,159 @@ +import pytest +import pandas as pd + +from os import makedirs, chdir +from subprocess import run +from pathlib import Path + +scripts_base = Path(__file__).parents[2] +run_str = f"{scripts_base}/payu_config/archive_scripts/standardise_mom6_filenames.sh" + +DIAG_BASE = "access-om3.mom6.h.test" + + +def assert_file_exists(p): + if not Path(p).resolve().is_file(): + raise AssertionError("File does not exist: %s" % str(p)) + + +def assert_f_not_exists(p): + if Path(p).resolve().is_file(): + raise AssertionError("File exists and should not: %s" % str(p)) + + +def yearly_files(dir_name, n, tmp_path): + """ + Make empty data files + """ + + times = pd.date_range("2010-01-01", freq="YE", periods=n) + + out_dir = str(tmp_path) + "/" + dir_name + "/" + paths = [f"{out_dir}{DIAG_BASE}._{str(t)[0:4]}.nc" for t in times] + + makedirs(out_dir) + + for p in paths: + with open(p, "w") as f: + f.close() + + for p in paths: + assert_file_exists(p) + + return paths + + +@pytest.mark.parametrize( + "hist_dir, use_dir, n", + [ + ("archive/output000", False, 12), + ("archive/output999", False, 1), + ("archive/output9999", False, 1), + ("archive/output574", True, 12), + ], +) # run this test with a several folder names and lengths, provide the directory as an argument sometimes +def test_true_case(hist_dir, use_dir, n, tmp_path): + + yearly_paths = yearly_files(hist_dir, n, tmp_path) + chdir(tmp_path) + output_dir = Path(yearly_paths[0]).parents[0] + + if not use_dir: # default path + run([run_str]) + else: # provide path + run( + [ + run_str, + "-d", + output_dir, + ], + ) + + expected_years = pd.date_range("2010-01-01", freq="YE", periods=n + 1) + + # valid output filenames + expected_paths = [ + f"{output_dir}/{DIAG_BASE}.{str(t)[0:4]}.nc" for t in expected_years + ] + + for p in expected_paths[0:n]: + assert_file_exists(p) + + for p in expected_paths[n]: + assert_f_not_exists(p) + + for p in yearly_paths: + assert_f_not_exists(p) + + +@pytest.mark.parametrize( + "hist_dir, use_dir, n", + [ + ("archive/output000", False, 12), + ], +) +def test_dont_override(hist_dir, use_dir, n, tmp_path): + """ + make some empty data files, and make some files where the files should be renamed to, + and confirm it doesn't delete any of them + """ + + yearly_paths = yearly_files(hist_dir, n, tmp_path) + chdir(tmp_path) + output_dir = Path(yearly_paths[0]).parents[0] + + # write the expected output too + expected_years = pd.date_range("2010-01-01", freq="YE", periods=n) + + expected_paths = [ + f"{output_dir}/{DIAG_BASE}.{str(t)[0:4]}.nc" for t in expected_years + ] + + for p in expected_paths: + with open(p, "w") as f: + f.close() + + if not use_dir: # default path + run([run_str]) + else: # provide path + run( + [ + run_str, + "-d", + output_dir, + ], + ) + + for p in expected_paths: + assert_file_exists(p) + + for p in yearly_paths: + assert_file_exists(p) + + +# @pytest.mark.parametrize("hist_dir, ndays", [("Default", 31), ("Default", 27)]) +# def test_no_override(hist_dir, ndays, hist_base, tmp_path): +# """ +# Run the script to convert the daily data into monthly files, but the output filename already exists, and check nothing happens. +# """ + +# daily_paths = daily_files(hist_dir, hist_base, ndays, tmp_path) + +# chdir(tmp_path) +# output_dir = Path(daily_paths[0]).parents[0] + +# expected_months = pd.date_range("2010-01-01", freq="ME", periods=1) + +# monthly_paths = [ +# f"{output_dir}/{hist_base}.{str(t)[0:7]}.nc" for t in expected_months +# ] +# for p in monthly_paths: +# Path(p).touch() + +# run([run_str]) + +# for p in daily_paths: +# assert_file_exists(p) + +# for p in monthly_paths: +# assert_file_exists(p)