Merge pull request #188 from ACCESS-Community-Hub/prerelease

Prerelease for v1.1.0
ACCESS-Community-Hub · Oct 17, 2024 · 9442b38 · 9442b38
2 parents 49651f7 + 6873c75
commit 9442b38
Show file tree

Hide file tree

Showing 78 changed files with 6,369 additions and 3,326 deletions.
diff --git a/.github/workflows/mopper-conda-release.yaml b/.github/workflows/mopper-conda-release.yaml
@@ -25,7 +25,10 @@ jobs:
         with:
           python-version: ${{ matrix.python-version }}
           activate-environment: mopper_env 
+          channels: conda-forge, coecms
+          channel-priority: true
           environment-file: conda/environment.yaml    # Path to the build conda environment
+          auto-update-conda: false
           show-channel-urls: true #
       - name: Build and upload the conda packages
         uses: uibcdf/[email protected]

diff --git a/.github/workflows/mopper-test-conda.yaml b/.github/workflows/mopper-test-conda.yaml
@@ -29,7 +29,10 @@ jobs:
         with:
           python-version: ${{ matrix.python-version }}
           activate-environment: mopper_env 
+          channels: conda-forge, coecms
+          channel-priority: true
           environment-file: conda/environment.yaml    # Path to the build conda environment
+          auto-update-conda: false
           show-channel-urls: true #
       - name: Build but do not upload the conda packages
         uses: uibcdf/[email protected]

diff --git a/ACDD_conf.yaml b/ACDD_conf.yaml
@@ -4,7 +4,8 @@
 cmor:
     # If test true it will just run the setup but not launch the job automatically
     test: false
-    appdir:  /g/data/ua8/Working/packages/ACCESS-MOPPeR 
+    # working directory if default uses current directory
+    appdir:  default 
     # output directory for all generated data (CMORISED files & logs)
     # if default it is set to /scratch/$project/$user/MOPPER_OUTPUT<exp>
     outpath: default
@@ -29,6 +30,11 @@ cmor:
     access_version: CM2 
     # reference date for time units (set as 'default' to use start_date)
     reference_date: 1970-01-01             
+    # Path and file templates can be changed based on the experiment.
+    # The example below should be considered a minimum requirement. 
+    # Consider adding 'table_id" if using the "all tables" option to list
+    # the variables to process as variables can be present at same frequency
+    # in more than one tables  
     path_template: "{product_version}/{frequency}"
     # date_range is automatically added at the end of filename
     file_template: "{variable_id}_{source_id}_{experiment_id}_{frequency}"
@@ -41,6 +47,7 @@ cmor:
     shuffle: 1
     # Variables to CMORise: 
     # CMOR table/variable to process; default is 'all'. 
+    # 'all' will use all the tables listed in the mapping file
     # Or create a yaml file listing variables to process (VAR_SUBSET[_LIST]).
     # each line: <table: [var1, var2, var3 ..]>
     tables: CMIP6_Amon
@@ -81,16 +88,21 @@ cmor:
     _AXIS_ENTRY_FILE: "ACDD_coordinate.json"
     _FORMULA_VAR_FILE: "ACDD_formula_terms.json"
     grids: "ACDD_grids.json"
-  # Additional NCI information:
+# Additional NCI information:
     # NCI project to charge compute; $PROJECT = your default project
     project: v45 
-    # additional NCI projects to be included in the storage flags
+    # additional NCI projects to be included in the storage flags, comma separated list
     addprojs: []
     # queue and memory (GB) per CPU (depends on queue),
-    # hugemem is reccomended for high reoslution data and/or derived variables 
+    # hugemem is recommended for high reoslution data and/or derived variables 
     # hugemem requires a minimum of 6 cpus this is handled by the code
     queue: hugemem
     mem_per_cpu: 32 
+    max_cpus: 24
+    # Mopper uses multiprocessing to produce files in parallel, usually 1 cpu per worker
+    # is a good compromise, occasionally you might want to pass a higher number
+    # if running out of memory
+    cpuxworker: 1 
     # walltime in "hh:mm:ss"
     walltime: '8:00:00'
     mode: custom
@@ -99,7 +111,7 @@ cmor:
     # you can override that by supplying the env to pass to "source"
     # Ex 
     # conda_env: <custom-env-path>/bin/activate
-    # or you can set "test: true" and modify mopper_job.sh manually
+    # to allow other settings use "test: true" and modify mopper_job.sh manually
     conda_env: default
 
 #
@@ -174,4 +186,4 @@ attrs:
     parent: !!bool false 
     # CMOR will add a tracking_id if you want to define a prefix add here
     tracking_id_prefix: 
-    comment: "post-processed using ACCESS-MOPPeR v1.0.0 https://doi.org/10.5281/zenodo.12747219"
+    comment: "post-processed using ACCESS-MOPPeR v1.1.0 https://doi.org/10.5281/zenodo.13841181"
diff --git a/CMIP6_conf.yaml b/CMIP6_conf.yaml
@@ -4,7 +4,8 @@
 cmor:
     # If test true it will just run the setup but not launch the job automatically
     test: false
-    appdir:  /g/data/ua8/Working/packages/ACCESS-MOPPeR
+    # working directory if default uses current directory
+    appdir:  default
     # output directory for all generated data (CMORISED files & logs)
     # if default it is set to /scratch/$project/$user/MOPPER_OUTPUT<exp>
     outpath: default
@@ -38,6 +39,7 @@ cmor:
 
     # Variables to CMORise: 
     # CMOR table/variable to process; default is 'all'.
+    # 'all' will use all the tables listed in the mapping file
     # Or create a yaml file listing variables to process (VAR_SUBSET[_LIST]).
     # each line: <table: [var1, var2, var3 ..]>
     tables: CMIP6_Amon 
@@ -79,19 +81,29 @@ cmor:
     grids: CMIP6_grids.json
   # Additional NCI information:
     # NCI project to charge compute; $PROJECT = your default project
-    # NCI queue to use; hugemem is recommended
     project: v45
-    # additional NCI projects to be included in the storage flags
+    # additional NCI projects to be included in the storage flags, comma separated list
     addprojs: []
     # queue and memory (GB) per CPU (depends on queue) 
-    # hugemem is reccomended for high reoslution data and/or derived variables
+    # hugemem is recommended for high resolution data and/or derived variables
     # hugemem requires a minimum of 6 cpus this is handled by the code
     queue: hugemem
     mem_per_cpu: 30
+    max_cpus: 24
+    # Mopper uses multiprocessing to produce files in parallel, usually 1 cpu per worker
+    # is a good compromise, occasionally you might want to pass a higher number
+    # if running out of memory
+    cpuxworker: 1
     # walltime in "hh:mm:ss"
     walltime: '8:00:00'
     mode: cmip6
+    # if default uses hh5 conda env
+    # conda_env to use by default hh5 analysis3-unstable
+    # as this has the code and all dependecies installed
+    # you can override that by supplying the env to pass to "source"
+    # Ex
     # conda_env: <custom-env-path>/bin/activate
+    # to allow other settings use "test: true" and modify mopper_job.sh manually
     conda_env: default
 
 # Global attributes: these will be added to each files comment unwanted ones
@@ -163,4 +175,4 @@ attrs:
     #CMOR will add a tracking_id if you want to define a prefix add here
     tracking_id_prefix:
     Conventions: "CF-1.7 CMIP-6.2" 
-    comment: "post-processed using ACCESS-MOPPeR v1.0.0 https://doi.org/10.5281/zenodo.12747219"
+    comment: "post-processed using ACCESS-MOPPeR v1.1.0 https://doi.org/10.5281/zenodo.13841181"
diff --git a/README.md b/README.md
@@ -1,6 +1,6 @@
 # [ACCESS Model Output Post-Processor (MOPPeR)](https://access-mopper.readthedocs.io/en/latest)
 [![Read the docs](https://readthedocs.org/projects/access-mopper/badge/?version=latest)](https://access-mopper.readthedocs.io/en/latest/)
-[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.12747219.svg)](https://doi.org/10.5281/zenodo.12747219)
+[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.13841181.svg)](https://doi.org/10.5281/zenodo.13841181)
 
 This code is derived from the [APP4](https://doi.org/10.5281/zenodo.7703469), initially created by Peter Uhe for CMIP5, and further developed for CMIP6-era by Chloe Mackallah from CSIRO, O&A Aspendale.
 
@@ -36,6 +36,6 @@ If you want to install an unstable version or a different branch:
 MOPPeR is pre-installed into a Conda environment at NCI. Load it with::
 
     module use /g/data3/hh5/public/modules
-    module load conda/analysis3
+    module load conda/analysis3-unstable
 
   NB. You need to be a member of the hh5 project to load the modules.
diff --git a/conda/environment.yaml b/conda/environment.yaml
@@ -1,8 +1,6 @@
 name: mopper_env
 channels:
   - conda-forge
-  - coecms
-  - default
 
 dependencies:
   - anaconda-client

diff --git a/conda/meta.yaml b/conda/meta.yaml
@@ -1,16 +1,17 @@
 package:
     name: mopper 
     #version: "{{ environ.get('GIT_DESCRIBE_TAG', '1.0') }}"
-    version: '1.0.0'
+    version: '1.1.0'
 #source:
 # path: ./
 
 source:
   #url: https://github.com/ACCESS-Hive/ACCESS-MOPPeR/archive/refs/tags/{{version}}.tar.gz
   git_url: https://github.com/ACCESS-Hive/ACCESS-MOPPeR.git
-  #git_tag: prerelease
-  git_rev: "1.0.0"
+  git_tag: prerelease
+  #git_rev: "1.1.0"
   #git_depth: 1 # (Defaults to -1/not shallow)
+  #path: ../.
 
 build:
     number: 1
@@ -27,29 +28,21 @@ requirements:
     run:
         - python
         - click
+        - xarray>=2024.05.0
         - cmor
-        - xarray
         - numpy
         - dask
         - pyyaml
         - cftime
         - python-dateutil
+        - metpy
+        - gsw
 
 test:
-  #imports:
-  #  - mopdb
-  #  - mopper
   source_files:
     - tests
     - tests/testdata
   requires:
-    - cmor
-    - xarray
-    - numpy
-    - dask
-    - pyyaml
-    - cftime
-    - python-dateutil
     - pytest
     - pyfakefs
     - coverage

diff --git a/conda/testenv.yaml b/conda/testenv.yaml
@@ -1,16 +1,19 @@
 name: testenv
 channels:
   - conda-forge
+  - nodefaults
 
 dependencies:
-  - cmor
   - click
-  - xarray
+  - xarray>=2024.05.0
+  - cmor
   - numpy
   - dask
   - pyyaml
   - cftime
   - python-dateutil
+  - metpy
+  - gsw
   - pytest 
   - coverage 
   - codecov

diff --git a/docs/calculations.rst b/docs/calculations.rst
@@ -0,0 +1,78 @@
+Calculating derived variables
+=============================
+
+Calculations are used to derive a variable from one or multiple inputs, to resample a variable to a new frequency or generally to modify a variable so it will match fully the corresponding definition in a CMOR table.
+
+How calculations work
+---------------------
+Calculations are defined in the mapping file under the filed by the same name. The `calculation` string gets literally evaluated by the tool using python eval() function.
+As an example 
+ simple calculation could be summing  avariable across all its vertical levels:
+
+.. code-block:: bash
+
+    mrso;fld_s08i223;var[0].sum(dim='depth')
+
+`var` represents the list of input variables, in this case there's only one which var[0] in the calculation string. In this case the calculation is very simple and can be fully defined in the mapping itself. If the calculation is more complex it's easier to use a pre-defined function, for example:
+
+.. code-block:: bash
+
+    hus24;fld_s00i010 fld_s00i408;plevinterp(var[0], var[1], 24)
+
+Here plevinterp is called to interpolate specific humidity from model levels to pressure levels, this function takes three input arguments, the variable to interpolate, pressure at model levels and finally the number of pressure levels, which corresponds to a specific definition of the pressure levels coordinate.
+Already available functions are listed below.
+
+.. note::
+
+    When more than one variable is used as input, if the variables are not all in the same file, more than one file pattern can be specified in the mapping row.   
+
+Resample
+^^^^^^^^
+If a variable is available in the raw model output but not at the desired frequency, the tool will try to see if a higher frequency is available to be resampled. For example, if a user is interested in daily surface temperature but this is available only as hourly data, during the `mop setup` phase the tool will add a `resample` attribute with value 'D' to the variable and this will used as argument for the resample function. Which kind of statistics to use for the function is defined based on the `timeshot` attribute, so if a variable is defined as a maximum, minimum or sum these are used in the resample instead of the mean.
+
+Contributing
+------------
+TBA
+
+
+Available functions
+-------------------
+
+Atmosphere and aerosol
+^^^^^^^^^^^^^^^^^^^^^^
+.. automodule:: mopper.calc_atmos
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+
+Ocean
+^^^^^
+.. automodule:: mopper.calc_ocean
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+
+SeaIce
+^^^^^^
+.. automodule:: mopper.calc_seaice
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+
+Land
+^^^^
+.. automodule:: mopper.calc_land
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+
+Other
+^^^^^
+.. automodule:: mopper.calc_utils
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/docs/cmor.rst b/docs/cmor.rst
@@ -48,7 +48,7 @@ There are custom tables for CM2 variables not yet included in the CMIP6 tables a
 Experiment input file
 ---------------------
 
-This provides user-supplied metadata and configuration directives used by CMOR, in cluding which controlled vocabulary (CV), grids and coordinate definitions to use and values for the attributes describing the model and simulation.
+This provides user-supplied metadata and configuration directives used by CMOR, including which controlled vocabulary (CV), grids and coordinate definitions to use and values for the attributes describing the model and simulation.
 
 We simplified this process so the user only has to pass one configuration file to control all the necessary inputs.
 The `mop setup` command will then create an experiment file as expected by CMOR based on this and the selected CV file. This is described in the :ref:`Getting started section <conf-file>`.

diff --git a/docs/cmor_conf.yaml b/docs/cmor_conf.yaml
@@ -39,8 +39,9 @@ cmor:
     # shuffle 0: off 1:on Shuffle reduces size without impacting speed
     deflate_level: 4
     shuffle: 1
-    # Variables to CMORise: 
-    # CMOR table/variable to process; default is 'all'. 
+    # Variables to CMORise:
+    # CMOR table/variable to process; default is 'all'.
+    # 'all' will use all the tables listed in the mapping file
     # Or create a yaml file listing variables to process (VAR_SUBSET[_LIST]).
     # each line: <table: [var1, var2, var3 ..]>
     tables: CMIP6_Amon
@@ -85,12 +86,25 @@ cmor:
     # NCI project to charge compute; $PROJECT = your default project
     # NCI queue to use; hugemem is recommended
     project: v45 
-    # additional NCI projects to be included in the storage flags
+    # additional NCI projects to be included in the storage flags, comma separated list
     addprojs: []
-    # queue and memory (GB) per CPU (depends on queue) 
+    # queue and memory (GB) per CPU (depends on queue),
+    # hugemem is recommended for high reoslution data and/or derived variables
+    # hugemem requires a minimum of 6 cpus this is handled by the code
     queue: hugemem
-    mem_per_cpu: 32 
+    mem_per_cpu: 32
+    max_cpus: 24
+    # Mopper uses multiprocessing to produce files in parallel, usually 1 cpu per worker
+    # is a good compromise, occasionally you might want to pass a higher number
+    # if running out of memory
+    cpuxworker: 1
     # walltime in "hh:mm:ss"
     walltime: '8:00:00'
     mode: custom
-    conda_env: /g/data/.../mopper_env/bin/activate
+    # conda_env to use by default hh5 analysis3-unstable
+    # as this has the code and all dependecies installed
+    # you can override that by supplying the env to pass to "source"
+    # Ex
+    # conda_env: <custom-env-path>/bin/activate
+    # to allow other settings use "test: true" and modify mopper_job.sh manually
+    conda_env: default