-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Browse files
Browse the repository at this point in the history
* Add standardise_mom6_filenames.sh script (#32) * mom6 filenames test --------- Co-authored-by: Anton Steketee <[email protected]>
- Loading branch information
1 parent
1a1cd0a
commit b4048fd
Showing
3 changed files
with
213 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
#!/usr/bin/bash | ||
# Copyright 2024 ACCESS-NRI and contributors. See the top-level COPYRIGHT file for details. | ||
# SPDX-License-Identifier: Apache-2.0. | ||
# | ||
# Standardise file naming for MOM6 output files in access-om3 by removing the underscore before the four-digit year, i.e., replacing '_YYYY' with 'YYYY' | ||
# This was written assuming it would be used as a payu "userscript" at the "archive" stage, but alternatively a path to an "archive" directory can be provided. | ||
# For more details, see https://github.com/COSIMA/om3-scripts/issues/32 | ||
|
||
Help() | ||
{ | ||
# Display help | ||
echo -e "Standardise file naming for MOM6 output files.\n" | ||
echo "Syntax: scriptTemplate [-h|d DIRECTORY]" | ||
echo "options:" | ||
echo "h Print this help message." | ||
echo -e "d Process files in the specified 'DIRECTORY'." | ||
} | ||
|
||
while getopts ":hd:" option; do | ||
case $option in | ||
h) # display help | ||
Help | ||
exit;; | ||
d) # Enter a directory | ||
out_dir=$OPTARG | ||
if [ ! -d $out_dir ]; then | ||
echo $out_dir Does not exist | ||
exit | ||
fi;; | ||
\?) # Invalid option | ||
echo "Error: Invalid option" | ||
exit;; | ||
esac | ||
done | ||
|
||
# if no directory was specified, collect all directories from 'archive' | ||
if [ -z $out_dir ]; then | ||
out_dirs=$(ls -rd archive/output*[0-9] 2>/dev/null) | ||
else | ||
out_dirs=$out_dir | ||
fi | ||
|
||
# process each output directory | ||
for dir in ${out_dirs[@]}; do | ||
# process each mom6 file | ||
for current_file in $dir/access-om3.mom6.*.nc; do | ||
if [ -f $current_file ]; then | ||
new_filename=$(echo $current_file | sed -E 's/_([0-9]{4})/\1/') | ||
# rename the file without overwriting existing files | ||
mv -n $current_file $new_filename | ||
fi | ||
done | ||
done |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,159 @@ | ||
import pytest | ||
import pandas as pd | ||
|
||
from os import makedirs, chdir | ||
from subprocess import run | ||
from pathlib import Path | ||
|
||
scripts_base = Path(__file__).parents[2] | ||
run_str = f"{scripts_base}/payu_config/archive_scripts/standardise_mom6_filenames.sh" | ||
|
||
DIAG_BASE = "access-om3.mom6.h.test" | ||
|
||
|
||
def assert_file_exists(p): | ||
if not Path(p).resolve().is_file(): | ||
raise AssertionError("File does not exist: %s" % str(p)) | ||
|
||
|
||
def assert_f_not_exists(p): | ||
if Path(p).resolve().is_file(): | ||
raise AssertionError("File exists and should not: %s" % str(p)) | ||
|
||
|
||
def yearly_files(dir_name, n, tmp_path): | ||
""" | ||
Make empty data files | ||
""" | ||
|
||
times = pd.date_range("2010-01-01", freq="YE", periods=n) | ||
|
||
out_dir = str(tmp_path) + "/" + dir_name + "/" | ||
paths = [f"{out_dir}{DIAG_BASE}._{str(t)[0:4]}.nc" for t in times] | ||
|
||
makedirs(out_dir) | ||
|
||
for p in paths: | ||
with open(p, "w") as f: | ||
f.close() | ||
|
||
for p in paths: | ||
assert_file_exists(p) | ||
|
||
return paths | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"hist_dir, use_dir, n", | ||
[ | ||
("archive/output000", False, 12), | ||
("archive/output999", False, 1), | ||
("archive/output9999", False, 1), | ||
("archive/output574", True, 12), | ||
], | ||
) # run this test with a several folder names and lengths, provide the directory as an argument sometimes | ||
def test_true_case(hist_dir, use_dir, n, tmp_path): | ||
|
||
yearly_paths = yearly_files(hist_dir, n, tmp_path) | ||
chdir(tmp_path) | ||
output_dir = Path(yearly_paths[0]).parents[0] | ||
|
||
if not use_dir: # default path | ||
run([run_str]) | ||
else: # provide path | ||
run( | ||
[ | ||
run_str, | ||
"-d", | ||
output_dir, | ||
], | ||
) | ||
|
||
expected_years = pd.date_range("2010-01-01", freq="YE", periods=n + 1) | ||
|
||
# valid output filenames | ||
expected_paths = [ | ||
f"{output_dir}/{DIAG_BASE}.{str(t)[0:4]}.nc" for t in expected_years | ||
] | ||
|
||
for p in expected_paths[0:n]: | ||
assert_file_exists(p) | ||
|
||
for p in expected_paths[n]: | ||
assert_f_not_exists(p) | ||
|
||
for p in yearly_paths: | ||
assert_f_not_exists(p) | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"hist_dir, use_dir, n", | ||
[ | ||
("archive/output000", False, 12), | ||
], | ||
) | ||
def test_dont_override(hist_dir, use_dir, n, tmp_path): | ||
""" | ||
make some empty data files, and make some files where the files should be renamed to, | ||
and confirm it doesn't delete any of them | ||
""" | ||
|
||
yearly_paths = yearly_files(hist_dir, n, tmp_path) | ||
chdir(tmp_path) | ||
output_dir = Path(yearly_paths[0]).parents[0] | ||
|
||
# write the expected output too | ||
expected_years = pd.date_range("2010-01-01", freq="YE", periods=n) | ||
|
||
expected_paths = [ | ||
f"{output_dir}/{DIAG_BASE}.{str(t)[0:4]}.nc" for t in expected_years | ||
] | ||
|
||
for p in expected_paths: | ||
with open(p, "w") as f: | ||
f.close() | ||
|
||
if not use_dir: # default path | ||
run([run_str]) | ||
else: # provide path | ||
run( | ||
[ | ||
run_str, | ||
"-d", | ||
output_dir, | ||
], | ||
) | ||
|
||
for p in expected_paths: | ||
assert_file_exists(p) | ||
|
||
for p in yearly_paths: | ||
assert_file_exists(p) | ||
|
||
|
||
# @pytest.mark.parametrize("hist_dir, ndays", [("Default", 31), ("Default", 27)]) | ||
# def test_no_override(hist_dir, ndays, hist_base, tmp_path): | ||
# """ | ||
# Run the script to convert the daily data into monthly files, but the output filename already exists, and check nothing happens. | ||
# """ | ||
|
||
# daily_paths = daily_files(hist_dir, hist_base, ndays, tmp_path) | ||
|
||
# chdir(tmp_path) | ||
# output_dir = Path(daily_paths[0]).parents[0] | ||
|
||
# expected_months = pd.date_range("2010-01-01", freq="ME", periods=1) | ||
|
||
# monthly_paths = [ | ||
# f"{output_dir}/{hist_base}.{str(t)[0:7]}.nc" for t in expected_months | ||
# ] | ||
# for p in monthly_paths: | ||
# Path(p).touch() | ||
|
||
# run([run_str]) | ||
|
||
# for p in daily_paths: | ||
# assert_file_exists(p) | ||
|
||
# for p in monthly_paths: | ||
# assert_file_exists(p) |