Merge branch 'develop' into feature/issue-237-improve-test-coverage-f…

…urther-particularly-for-dimension_cleanup
nasa · Nov 18, 2024 · 46801dc · 46801dc
2 parents 38db147 + 3018a6b
commit 46801dc
Show file tree

Hide file tree

Showing 18 changed files with 615 additions and 582 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,46 +1,56 @@
----
 ci:
   autoupdate_schedule: "monthly" # Like dependabot
   autoupdate_commit_msg: "chore: update pre-commit hooks"
   autoupdate_branch: "develop"
   autofix_prs: false # Comment "pre-commit.ci autofix" on a PR to trigger
 
+default_language_version:
+  python: python3.10
+
 repos:
+  - repo: https://github.com/gitleaks/gitleaks
+    rev: v8.21.2
+    hooks:
+      - id: gitleaks
+
   - repo: https://github.com/pre-commit/pre-commit-hooks
     rev: v5.0.0
     hooks:
-      - id: trailing-whitespace
-        exclude: tests(/\w*)*/functional/t/trailing_whitespaces.py|tests/pyreverse/data/.*.html|doc/data/messages/t/trailing-whitespace/bad.py
+      # Validate format
+      - id: check-yaml
+      - id: check-toml
+      - id: check-json
+      # Check for common mistakes
+      - id: check-added-large-files
+      - id: check-case-conflict
+      # - id: check-illegal-windows-names # TODO: Enable in next release
+      - id: check-merge-conflict
+      - id: check-executables-have-shebangs
+      - id: check-shebang-scripts-are-executable
+      - id: check-symlinks
+      - id: check-vcs-permalinks
+      - id: destroyed-symlinks
+      - id: detect-private-key
       - id: end-of-file-fixer
-        exclude: |
-          (?x)^(
-            tests(/\w*)*/functional/m/missing/missing_final_newline.py|
-            tests/functional/t/trailing_newlines.py|
-            doc/data/messages/t/trailing-newlines/bad.py|
-          )$
+      - id: mixed-line-ending
+      - id: no-commit-to-branch # protects `main` by default
+      - id: debug-statements
+      - id: trailing-whitespace
 
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    # Ruff version.
-    rev: 'v0.7.0'
+    rev: v0.7.3
     hooks:
       - id: ruff
-        args: [ "--fix" ]
-
-  # https://github.com/python/black#version-control-integration
-  - repo: https://github.com/psf/black
-    rev: 24.10.0
-    hooks:
-      - id: black-jupyter
+        args: ["--fix", "--exit-non-zero-on-fix"]
+      - id: ruff-format
 
   - repo: https://github.com/pre-commit/mirrors-mypy
-    rev: v1.12.1
+    rev: v1.13.0
     hooks:
       - id: mypy
 
-  # Other Linters
-  - repo: https://github.com/adrienverge/yamllint.git
-    rev: v1.35.1
-    hooks:
-      - id: yamllint
-        args: ["-d {extends: relaxed, rules: {line-length: {max: 120}}}"]
-        stages: [commit, push]
+# TODO: Reconsider using the alexjs hook when there is a way to ignore particular warnings and/or files.
+#  - repo: "https://github.com/mfisher87/alexjs-pre-commit-mirror"
+#    rev: "v11.0.1"  # Use the sha / tag you prefer
+#    hooks:
+#      - id: "alex"
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -4,9 +4,16 @@ All notable changes to this project will be documented in this file.
 The format is based on [Common Changelog](https://common-changelog.org/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
-## [1.5.0] - 2024-10-28
+## Unreleased
 
 ### Changed
+
+- update pre-commit: to autoupdate and with gitleaks ([#247](https://github.com/nasa/stitchee/pull/247))([**@danielfromearth**](https://github.com/danielfromearth))
+
+## [1.5.0] - 2024-11-08
+
+### Changed
+
 - Update tutorial notebook to use PROD instead of UAT and improve readability ([#241](https://github.com/nasa/stitchee/issues/241))([**@danielfromearth**](https://github.com/danielfromearth))
 
 ### Added
@@ -20,6 +27,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ## [1.4.0] - 2024-08-19
 
 ### Changed
+
 - Allow single netCDF file input in addition to single text file listings ([#230](https://github.com/nasa/stitchee/issues/230))([**@danielfromearth**](https://github.com/danielfromearth))
 
 ## [1.3.0] - 2024-07-11

diff --git a/README.md b/README.md
@@ -11,9 +11,6 @@
     <a href='https://stitchee.readthedocs.io/en/latest/?badge=latest'>
         <img src='https://readthedocs.org/projects/stitchee/badge/?version=latest' alt='Documentation Status' />
     </a>
-    <a href="https://github.com/python/black" target="_blank">
-        <img src="https://img.shields.io/badge/code%20style-black-000000.svg" alt="Code style">
-    </a>
     <a href="http://mypy-lang.org/" target="_blank">
         <img src="http://www.mypy-lang.org/static/mypy_badge.svg" alt="Mypy checked">
     </a>

diff --git a/concatenator/attribute_handling.py b/concatenator/attribute_handling.py
@@ -1,5 +1,5 @@
 """Functions for converting "coordinates" in netCDF variable attributes
-    between paths that reference a group hierarchy and flattened paths.
+between paths that reference a group hierarchy and flattened paths.
 """
 
 import json

diff --git a/concatenator/dimension_cleanup.py b/concatenator/dimension_cleanup.py
@@ -53,7 +53,9 @@ def remove_duplicate_dims(nc_dataset: nc.Dataset) -> nc.Dataset:
 
         # Attributes for the original variable are retrieved.
         attrs_contents = get_attributes_minus_fillvalue_and_renamed_coords(
-            original_var_name=dup_var_name, new_var_name=dim_dup_new, original_dataset=nc_dataset
+            original_var_name=dup_var_name,
+            new_var_name=dim_dup_new,
+            original_dataset=nc_dataset,
         )
         # for attrname in dup_var.ncattrs():
         #     if attrname != '_FillValue':
@@ -67,13 +69,11 @@ def remove_duplicate_dims(nc_dataset: nc.Dataset) -> nc.Dataset:
 
         # Only create a new *Dimension* if it doesn't already exist.
         if dim_dup_new not in nc_dataset.dimensions.keys():
-
             # New dimension is created by copying from the duplicated dimension.
             nc_dataset.createDimension(dim_dup_new, dim_dup_length)
 
             # Only create a new dimension *Variable* if it existed originally in the NetCDF structure.
             if dim_dup in nc_dataset.variables.keys():
-
                 # New variable object is created for the renamed, previously duplicated dimension.
                 new_dup_var[dim_dup_new] = nc_dataset.createVariable(
                     dim_dup_new,
@@ -82,7 +82,9 @@ def remove_duplicate_dims(nc_dataset: nc.Dataset) -> nc.Dataset:
                     fill_value=fill_value,
                 )
                 dim_var_attr_contents = get_attributes_minus_fillvalue_and_renamed_coords(
-                    original_var_name=dim_dup, new_var_name=dim_dup_new, original_dataset=nc_dataset
+                    original_var_name=dim_dup,
+                    new_var_name=dim_dup_new,
+                    original_dataset=nc_dataset,
                 )
                 for attr_name, contents in dim_var_attr_contents.items():
                     new_dup_var[dim_dup_new].setncattr(attr_name, contents)
@@ -94,7 +96,10 @@ def remove_duplicate_dims(nc_dataset: nc.Dataset) -> nc.Dataset:
 
         # Replace original *Variable* with new variable with no duplicated dimensions.
         new_dup_var[dup_var_name] = nc_dataset.createVariable(
-            dup_var_name, str(dup_var[:].dtype), tuple(new_dim_list), fill_value=fill_value
+            dup_var_name,
+            str(dup_var[:].dtype),
+            tuple(new_dim_list),
+            fill_value=fill_value,
         )
         for attr_name, contents in attrs_contents.items():
             new_dup_var[dup_var_name].setncattr(attr_name, contents)

diff --git a/concatenator/harmony/download_worker.py b/concatenator/harmony/download_worker.py
@@ -13,7 +13,11 @@
 
 
 def multi_core_download(
-    urls: list, destination_dir: str, access_token: str, cfg: dict, process_count: int | None = None
+    urls: list,
+    destination_dir: str,
+    access_token: str,
+    cfg: dict,
+    process_count: int | None = None,
 ) -> list[Path]:
     """
     A method which automagically scales downloads to the number of CPU
@@ -74,7 +78,11 @@ def multi_core_download(
 
 
 def _download_worker(
-    url_queue: queue.Queue, path_list: list, destination_dir: str, access_token: str, cfg: dict
+    url_queue: queue.Queue,
+    path_list: list,
+    destination_dir: str,
+    access_token: str,
+    cfg: dict,
 ) -> None:
     """
     A method to be executed in a separate process which processes the url_queue

diff --git a/concatenator/harmony/service_adapter.py b/concatenator/harmony/service_adapter.py
@@ -134,14 +134,22 @@ def process_catalog(self, catalog: pystac.Catalog) -> pystac.Catalog:
             # -- Output to STAC catalog --
             result.clear_items()
             properties = dict(
-                start_datetime=datetimes["start_datetime"], end_datetime=datetimes["end_datetime"]
+                start_datetime=datetimes["start_datetime"],
+                end_datetime=datetimes["end_datetime"],
             )
 
             item = Item(
-                str(uuid4()), bbox_to_geometry(bounding_box), bounding_box, None, properties
+                str(uuid4()),
+                bbox_to_geometry(bounding_box),
+                bounding_box,
+                None,
+                properties,
             )
             asset = Asset(
-                staged_url, title=filename, media_type="application/x-netcdf4", roles=["data"]
+                staged_url,
+                title=filename,
+                media_type="application/x-netcdf4",
+                roles=["data"],
             )
             item.add_asset("data", asset)
             result.add_item(item)

diff --git a/concatenator/harmony/util.py b/concatenator/harmony/util.py
@@ -1,4 +1,5 @@
 """Misc utility functions"""
+
 from datetime import datetime
 
 from pystac import Asset, Item
@@ -78,7 +79,10 @@ def _get_output_date_range(input_items: list[Item]) -> dict[str, str]:
         start_datetime = min(start_datetime, new_start_datetime)
         end_datetime = max(end_datetime, new_end_datetime)
 
-    return {"start_datetime": start_datetime.isoformat(), "end_datetime": end_datetime.isoformat()}
+    return {
+        "start_datetime": start_datetime.isoformat(),
+        "end_datetime": end_datetime.isoformat(),
+    }
 
 
 def _get_item_date_range(item: Item) -> tuple[datetime, datetime]:

diff --git a/concatenator/run_stitchee.py b/concatenator/run_stitchee.py
@@ -93,7 +93,10 @@ def parse_args(args: list) -> argparse.Namespace:
         default="__",
     )
     parser.add_argument(
-        "-O", "--overwrite", action="store_true", help="Overwrite output file if it already exists."
+        "-O",
+        "--overwrite",
+        action="store_true",
+        help="Overwrite output file if it already exists.",
     )
     parser.add_argument(
         "-v",

diff --git a/concatenator/stitchee.py b/concatenator/stitchee.py
@@ -88,7 +88,11 @@ def stitchee(
     concatenator.group_delim = group_delimiter
 
     intermediate_flat_filepaths: list[str] = []
-    benchmark_log = {"flattening": 0.0, "concatenating": 0.0, "reconstructing_groups": 0.0}
+    benchmark_log = {
+        "flattening": 0.0,
+        "concatenating": 0.0,
+        "reconstructing_groups": 0.0,
+    }
 
     # Proceed to concatenate only files that are workable (can be opened and are not empty).
     input_files, num_input_files = validate_workable_files(files_to_concat, logger)
@@ -123,7 +127,6 @@ def stitchee(
         # Instead of "with nc.Dataset() as" inside the loop, we use a context manager stack.
         # This way all files are cleanly closed outside the loop.
         with ExitStack() as context_stack:
-
             logger.info("Flattening all input files...")
             xrdataset_list = []
             concat_dim_order = []

diff --git a/docs/tutorial_examples.ipynb b/docs/tutorial_examples.ipynb
@@ -417,7 +417,11 @@
     "    ds_geo = xr.open_dataset(filepath, group=\"geolocation\")\n",
     "\n",
     "    if index < 0:\n",
-    "        X, Y, C = ds_geo.longitude.values, ds_geo.latitude.values, ds_product[product_name].values\n",
+    "        X, Y, C = (\n",
+    "            ds_geo.longitude.values,\n",
+    "            ds_geo.latitude.values,\n",
+    "            ds_product[product_name].values,\n",
+    "        )\n",
     "        filename = filepath\n",
     "    else:\n",
     "        X, Y, C = (\n",