From 5c62a3f7b61fc0cff77af51e82ab616f4717f03c Mon Sep 17 00:00:00 2001
From: Micael Oliveira <micael.oliveira@anu.edu.au>
Date: Wed, 13 Dec 2023 15:58:08 +1100
Subject: [PATCH] Added functions to read and write a MOM6 input file.

---
 om3utils/mom6_input.py   | 317 +++++++++++++++++++++++++++++++++++++++
 pyproject.toml           |   1 +
 tests/test_mom6_input.py | 101 +++++++++++++
 3 files changed, 419 insertions(+)
 create mode 100644 om3utils/mom6_input.py
 create mode 100644 tests/test_mom6_input.py

diff --git a/om3utils/mom6_input.py b/om3utils/mom6_input.py
new file mode 100644
index 0000000..018067d
--- /dev/null
+++ b/om3utils/mom6_input.py
@@ -0,0 +1,317 @@
+"""MOM6 input
+
+The MOM6 parameter file format is described here:
+
+https://mom6.readthedocs.io/en/main/api/generated/pages/Runtime_Parameter_System.html#mom6-parameter-file-syntax
+
+It has similarities with a Fortran namelist, but with some notable differences:
+ - no opening nor closing clauses ('&NAME' and '\')
+ - usage of an override directive ('#override')
+ - some character, like '*', are allowed in the MOM6 parameter files, but not in namelists
+We have also found MOM6 parameter files with C-style comments in files used by CESM. These are ignored by MOM6, but
+are actually not part of the specifications.
+
+However, it is possible to preprocess the file to make it a conforming Fortran namelist and then use the f90nml
+package to read it. Similarly, one can use the f90nml package to write the file and then postprocess it.
+
+This means that the path from a MOM6 parameter file to a Python dictionary requires the following steps:
+ 1. read file and preprocess it to handle the directives and the C-style comments.
+ 2. add opening and closing namelist clauses
+ 3. parse the file with f90nml, which returns a Namelist object
+ 4. convert the Namelist object to a Python dictionary
+
+Similarly, to get write a Python dictionary as a MOM6 parameter file, one requires the following steps:
+ 1. convert the Python dictionary into a Namelist object
+ 2. write the Namelist object to a file
+ 3. remove opening and closing namelist clauses
+
+ In the following, we use the following naming conventions:
+  - 'mom6_input': the contents of the parameter file as a Python dictionary
+  - 'mom6_input_str': the contents of the parameter file, stored as a string
+  - 'nml_str': the contents of the file, patched to make it a conforming namelist, stored as a string
+
+We then have utility functions to convert from one representation to another:
+  - nml_str -> mom6_input (_nml_str_to_mom6_input)
+  - mom6_input -> nml_str (_mom6_input_to_nml_str)
+  - mom6_input_str -> nml_str (_mom6_input_str_to_nml_str + patch_mom6_input_str)
+  - nml_str -> mom6_input_str (_nml_str_to_mom6_input_str + unpatch_mom6_input_str)
+
+For round-trip parsing, one needs to keep track of the changes done to the file to make it a conforming Fortran
+namelist and then undo those changes. Since we use the f90mnml parser ability to patch a file as it is read, we also
+need to keep the original nml_str and a dictionary with all the changes done to mom6_input. We do this by introducing
+a MOM6Input class that extends the dict class.
+"""
+
+from pathlib import Path
+import re
+from io import StringIO
+
+import f90nml
+
+
+def _patch_mom6_input_str(mom6_input_str: str) -> tuple[str, dict]:
+    """Modify the contents of a MOM6 file into a Fortran namelist format readable by f90nml.
+
+    Currently, the "#override" directive is not properly supported. When parsing the file, we will treat variables with
+    this directive as normal variables (i.e., we will pretend the directive is not there), but when writing the file
+    back, the directive will be preserved. This might introduce unexpected changes.
+
+    Also includes fixes for some non-standard things we have come across. In particular:
+       - C style comments (/* This is a comment */). These are added by CESM/CIME. We simply remove them and do not put
+         them back when writing to a file.
+       - "#" before a variable declaration (without the "override"). Some experiments suggest the following behaviour
+         from the MOM6 parser: "# variable = 1" is equivalent to "variable = 1", while "#variable = 1" is equivalent to
+         "!variable = 1". We try to handle them accordingly and to preserve them when writing the file back.
+         (Reference: https://github.com/COSIMA/mom6-panan/commit/80e4a872f2b24f2e41da87439dd342df0c643d00#r130376163)
+
+    The changes are recorded as a "patch", which is a dictionary: the keys are the line numbers where changes
+    were made, while the values are tuples containing a keyword describing the type of change and, optionally, a string.
+
+    :param mom6_input_str:
+    :return:
+    """
+
+    # Define several patterns that need to be matched
+    comment_pattern = re.compile(r"/\*.*?\*/", flags=re.DOTALL)
+    zstar_pattern = re.compile(r"Z\*")
+    block_pattern = re.compile(
+        r"KPP%|%KPP|CVMix_CONVECTION%|%CVMix_CONVECTION|CVMIX_DDIFF%|%CVMIX_DDIFF"
+    )
+    override_directive_pattern = re.compile(r"^(#override\s*?)")
+    incorrect_directive_pattern = re.compile(r"^(#\s+)")
+    comment_directive_pattern = re.compile(r"^#(?:(?!override)\w+\b\s*=\s*\w+$)")
+
+    # Modify the input while recording the changes
+    patch = {}
+    output = ""
+    lines = mom6_input_str.split("\n")
+    for i in range(len(lines)):
+        line = lines[i] + "\n"
+        if zstar_pattern.search(line):
+            patch[i] = ("zstar", line)
+            output += zstar_pattern.sub("ZSTAR", line)
+        elif block_pattern.search(line):
+            patch[i] = ("block", line)
+            output += block_pattern.sub("", line)
+        elif override_directive_pattern.search(line):
+            patch[i] = ("override", override_directive_pattern.match(line).group(0))
+            output += override_directive_pattern.sub("", line)
+        elif incorrect_directive_pattern.search(line):
+            patch[i] = (
+                "incorrect directive",
+                incorrect_directive_pattern.match(line).group(0),
+            )
+            output += incorrect_directive_pattern.sub("", line)
+        elif comment_directive_pattern.search(line):
+            patch[i] = ("comment_directive", line)
+            output += "\n"
+        else:
+            output += line
+
+    # Remove all C-style comments. These are not recorded and will not be undone.
+    def replace_comment(match):
+        return "\n" * match.group().count("\n")
+
+    output = comment_pattern.sub(replace_comment, output)
+
+    return output, patch
+
+
+def _unpatch_mom6_input_str(mom6_input_str: str, patch: dict = None) -> str:
+    """Undo the changes that were done to a MOM6 parameter file to make it into a conforming Fortran namelist
+
+    :param mom6_input_str:
+    :param patch:
+    :return:
+    """
+    output = ""
+    lines = mom6_input_str.split("\n")[1:-2]
+    for i in range(len(lines)):
+        line = lines[i] + "\n"
+        if i in patch:
+            if patch[i][0] == "block":
+                output += patch[i][1]
+            elif patch[i][0] == "zstar":
+                output += re.sub(r"ZSTAR", "Z*", line)
+            elif patch[i][0] == "override":
+                output += patch[i][1] + line
+            elif patch[i][0] == "incorrect directive":
+                output += patch[i][1] + line
+            elif patch[i][0] == "comment_directive":
+                output += patch[i][1]
+        else:
+            line = line.lstrip() if line != "\n" else line
+            output += line
+    return output
+
+
+def _mom6_input_str_to_nml_str(mom6_input_str: str) -> str:
+    """
+
+    :param mom6_input_str:
+    :return:
+    """
+    return "&mom6\n" + mom6_input_str + "\n/"
+
+
+def _nml_str_to_mom6_input_str(nml_str: str) -> str:
+    """
+
+    :param nml_str:
+    :return:
+    """
+    lines = nml_str.split("\n")
+    lines = lines[1:-2]
+    return "\n".join(lines)
+
+
+def _mom6_input_to_nml_str(mom6_input: dict) -> str:
+    """
+
+    :param mom6_input:
+    :return:
+    """
+    output_file = StringIO("")
+    nml = f90nml.Namelist({"mom6": mom6_input})
+    nml.uppercase = True
+    nml.false_repr = "False"
+    nml.true_repr = "True"
+    nml.indent = 0
+    nml.write(output_file)
+    return output_file.getvalue()
+
+
+def _nml_str_to_mom6_input(nml_str: str) -> dict:
+    """
+
+    :param nml_str:
+    :return:
+    """
+    parser = f90nml.Parser()
+    nml = parser.reads(nml_str)
+    nml.uppercase = True
+    return dict(nml.todict()["mom6"])
+
+
+class Mom6Input(dict):
+    """Class to read, store, modify and write a MOM6 parameter file.
+
+    This class is used to enable round-trip parsing of MOM6 parameter files.
+    It overrides the dict methods to:
+      - stored all the keys in upper case
+      - keep track of the changes done to the original dictionary
+
+    It also stores a "patch" that was applied to the mom6_input_str to convert it to a conforming Fortran namelist.
+    This is used to "undo" the changes when writing the file.
+    """
+
+    # Patched contents of the file to make it look like proper f90 namelist
+    _mom6_input_str_patched = None
+
+    # Dictionary containing information that can be used to reconstruct the original file from the output of f90nml
+    _file_patch = {}
+
+    # A record of all the changes done to the dictionary that can be passed to f90nml to do round-trip parsing
+    _nml_patch = None
+
+    def __init__(self, file_name: str = None):
+        """
+
+        :param file_name:
+        """
+        # Open file and read contents
+        file = Path(file_name)
+        if not file.is_file():
+            raise FileNotFoundError(f"File not found: {file.as_posix()}")
+
+        with open(file, "r") as f:
+            mom6_input_str = f.read()
+
+        # Convert file contents to dictionary
+        self._mom6_input_str_patched, self._file_patch = _patch_mom6_input_str(
+            mom6_input_str
+        )
+        nml_str = _mom6_input_str_to_nml_str(self._mom6_input_str_patched)
+        mom6_input = _nml_str_to_mom6_input(nml_str)
+
+        # Initialize class dictionary
+        super().__init__(mom6_input)
+        self._keys_to_upper()
+
+        # Initialize nml patch
+        self._nml_patch = {"mom6": {}}
+
+    def __setitem__(self, key, value):
+        """
+
+        :param key:
+        :param value:
+        :return:
+        """
+        super().__setitem__(key.upper(), value)
+        if self._nml_patch:
+            self._nml_patch["mom6"][key.upper()] = value
+
+    def __getitem__(self, key):
+        """
+
+        :param key:
+        :return:
+        """
+        return super().__getitem__(key.upper())
+
+    def __delitem__(self, key):
+        """
+
+        :param key:
+        :return:
+        """
+        super().__delitem__(key.upper())
+
+    def write(self, file: Path):
+        """
+
+        :param file:
+        :return:
+        """
+        # Streams to pass to f90nml
+        nml_file = StringIO(_mom6_input_str_to_nml_str(self._mom6_input_str_patched))
+        tmp_file = StringIO("")
+
+        parser = f90nml.Parser()
+        parser.read(nml_file, self._nml_patch, tmp_file)
+        mom6_input_str = _unpatch_mom6_input_str(tmp_file.getvalue(), self._file_patch)
+        file.write_text(mom6_input_str)
+
+    def _keys_to_upper(self):
+        """
+
+        :return:
+        """
+        for key in list(self.keys()):
+            if not key.isupper():
+                self[key.upper()] = self.pop(key)
+
+
+def read_mom6_input(file_name: str) -> Mom6Input:
+    """
+
+    :param file_name:
+    :return:
+    """
+    return Mom6Input(file_name)
+
+
+def write_mom6_input(mom_input: [dict | Mom6Input], file: Path):
+    """
+
+    :param mom_input:
+    :param file:
+    :return:
+    """
+    if isinstance(mom_input, Mom6Input):
+        Mom6Input.write(file)
+    else:
+        nml_str = _mom6_input_to_nml_str(mom_input)
+        mom6_input_str = _nml_str_to_mom6_input_str(nml_str) + "\n"
+        file.write_text(mom6_input_str)
diff --git a/pyproject.toml b/pyproject.toml
index 2a5f3cb..29031b9 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -7,6 +7,7 @@ authors = [
 ]
 dynamic = ["version"]
 dependencies = [
+    "f90nml",
     "ruamel.yaml",
 ]
 
diff --git a/tests/test_mom6_input.py b/tests/test_mom6_input.py
new file mode 100644
index 0000000..f7cddb8
--- /dev/null
+++ b/tests/test_mom6_input.py
@@ -0,0 +1,101 @@
+import pytest
+import filecmp
+
+from utils import MockFile
+from om3utils.mom6_input import Mom6Input, write_mom6_input, read_mom6_input
+
+
+@pytest.fixture()
+def simple_mom6_input():
+    return {
+        "REGRIDDING_COORDINATE_MODE": "ZSTAR",
+        "N_SMOOTH": 4,
+        "INCORRECT_DIRECTIVE": 2,
+        "IGNORED_DIRECTIVE": 3,
+        "DT": 1800.0,
+        "BOOL": True,
+    }
+
+
+@pytest.fixture()
+def simple_mom6_input_file(tmp_path):
+    file = tmp_path / "simple_mom6_input_file"
+    mom6_input_str = """BOOL = True
+DT = 1800.0
+IGNORED_DIRECTIVE = 3
+INCORRECT_DIRECTIVE = 2
+N_SMOOTH = 4
+REGRIDDING_COORDINATE_MODE = 'ZSTAR'
+"""
+    return MockFile(file, mom6_input_str)
+
+
+@pytest.fixture()
+def complex_mom6_input_file(tmp_path):
+    file = tmp_path / "complex_mom6_input_file"
+    mom6_input_str = """
+/* This is a comment
+   spanning two lines */
+REGRIDDING_COORDINATE_MODE = Z*
+KPP%
+N_SMOOTH = 4
+%KPP
+
+#COMMENT_DIRECTIVE = 1
+# INCORRECT_DIRECTIVE = 2
+#override IGNORED_DIRECTIVE = 3
+DT = 1800.0  ! This is a comment
+! This is another comment
+!COMMENTED_VAR = 3
+BOOL = True
+"""
+    return MockFile(file, mom6_input_str)
+
+
+@pytest.fixture()
+def modified_mom6_input_file(tmp_path):
+    file = tmp_path / "modified_mom6_input_file"
+    mom6_input_str = """
+
+
+REGRIDDING_COORDINATE_MODE = Z*
+KPP%
+N_SMOOTH = 4
+%KPP
+
+#COMMENT_DIRECTIVE = 1
+# INCORRECT_DIRECTIVE = 2
+#override IGNORED_DIRECTIVE = 3
+DT = 900.0  ! This is a comment
+! This is another comment
+!COMMENTED_VAR = 3
+BOOL = True
+
+
+added_var = 32
+"""
+    return MockFile(file, mom6_input_str)
+
+
+def test_read_mom6_input(tmp_path, simple_mom6_input, simple_mom6_input_file):
+    mom6_input_from_file = read_mom6_input(file_name=simple_mom6_input_file.file)
+
+    assert mom6_input_from_file == simple_mom6_input
+
+
+def test_write_mom6_input(tmp_path, simple_mom6_input, simple_mom6_input_file):
+    file = tmp_path / "MOM_input"
+    write_mom6_input(simple_mom6_input, file)
+
+    assert filecmp.cmp(file, simple_mom6_input_file.file)
+
+
+def test_round_trip_mom6_input(
+    tmp_path, complex_mom6_input_file, modified_mom6_input_file
+):
+    mom6_input_from_file = Mom6Input(file_name=complex_mom6_input_file.file)
+    mom6_input_from_file["dt"] = 900.0
+    mom6_input_from_file["ADDED_VAR"] = 32
+    mom6_input_from_file.write(tmp_path / "MOM_input_new")
+
+    assert filecmp.cmp(tmp_path / "MOM_input_new", modified_mom6_input_file.file)