Skip to content

Commit

Permalink
Merge branch 'develop' into feature/issue-237-improve-test-coverage-f…
Browse files Browse the repository at this point in the history
…urther-particularly-for-dimension_cleanup
  • Loading branch information
danielfromearth committed Nov 18, 2024
2 parents 38db147 + 3018a6b commit 46801dc
Show file tree
Hide file tree
Showing 18 changed files with 615 additions and 582 deletions.
62 changes: 36 additions & 26 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,46 +1,56 @@
---
ci:
autoupdate_schedule: "monthly" # Like dependabot
autoupdate_commit_msg: "chore: update pre-commit hooks"
autoupdate_branch: "develop"
autofix_prs: false # Comment "pre-commit.ci autofix" on a PR to trigger

default_language_version:
python: python3.10

repos:
- repo: https://github.com/gitleaks/gitleaks
rev: v8.21.2
hooks:
- id: gitleaks

- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v5.0.0
hooks:
- id: trailing-whitespace
exclude: tests(/\w*)*/functional/t/trailing_whitespaces.py|tests/pyreverse/data/.*.html|doc/data/messages/t/trailing-whitespace/bad.py
# Validate format
- id: check-yaml
- id: check-toml
- id: check-json
# Check for common mistakes
- id: check-added-large-files
- id: check-case-conflict
# - id: check-illegal-windows-names # TODO: Enable in next release
- id: check-merge-conflict
- id: check-executables-have-shebangs
- id: check-shebang-scripts-are-executable
- id: check-symlinks
- id: check-vcs-permalinks
- id: destroyed-symlinks
- id: detect-private-key
- id: end-of-file-fixer
exclude: |
(?x)^(
tests(/\w*)*/functional/m/missing/missing_final_newline.py|
tests/functional/t/trailing_newlines.py|
doc/data/messages/t/trailing-newlines/bad.py|
)$
- id: mixed-line-ending
- id: no-commit-to-branch # protects `main` by default
- id: debug-statements
- id: trailing-whitespace

- repo: https://github.com/astral-sh/ruff-pre-commit
# Ruff version.
rev: 'v0.7.0'
rev: v0.7.3
hooks:
- id: ruff
args: [ "--fix" ]

# https://github.com/python/black#version-control-integration
- repo: https://github.com/psf/black
rev: 24.10.0
hooks:
- id: black-jupyter
args: ["--fix", "--exit-non-zero-on-fix"]
- id: ruff-format

- repo: https://github.com/pre-commit/mirrors-mypy
rev: v1.12.1
rev: v1.13.0
hooks:
- id: mypy

# Other Linters
- repo: https://github.com/adrienverge/yamllint.git
rev: v1.35.1
hooks:
- id: yamllint
args: ["-d {extends: relaxed, rules: {line-length: {max: 120}}}"]
stages: [commit, push]
# TODO: Reconsider using the alexjs hook when there is a way to ignore particular warnings and/or files.
# - repo: "https://github.com/mfisher87/alexjs-pre-commit-mirror"
# rev: "v11.0.1" # Use the sha / tag you prefer
# hooks:
# - id: "alex"
10 changes: 9 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,16 @@ All notable changes to this project will be documented in this file.
The format is based on [Common Changelog](https://common-changelog.org/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [1.5.0] - 2024-10-28
## Unreleased

### Changed

- update pre-commit: to autoupdate and with gitleaks ([#247](https://github.com/nasa/stitchee/pull/247))([**@danielfromearth**](https://github.com/danielfromearth))

## [1.5.0] - 2024-11-08

### Changed

- Update tutorial notebook to use PROD instead of UAT and improve readability ([#241](https://github.com/nasa/stitchee/issues/241))([**@danielfromearth**](https://github.com/danielfromearth))

### Added
Expand All @@ -20,6 +27,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [1.4.0] - 2024-08-19

### Changed

- Allow single netCDF file input in addition to single text file listings ([#230](https://github.com/nasa/stitchee/issues/230))([**@danielfromearth**](https://github.com/danielfromearth))

## [1.3.0] - 2024-07-11
Expand Down
3 changes: 0 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,6 @@
<a href='https://stitchee.readthedocs.io/en/latest/?badge=latest'>
<img src='https://readthedocs.org/projects/stitchee/badge/?version=latest' alt='Documentation Status' />
</a>
<a href="https://github.com/python/black" target="_blank">
<img src="https://img.shields.io/badge/code%20style-black-000000.svg" alt="Code style">
</a>
<a href="http://mypy-lang.org/" target="_blank">
<img src="http://www.mypy-lang.org/static/mypy_badge.svg" alt="Mypy checked">
</a>
Expand Down
2 changes: 1 addition & 1 deletion concatenator/attribute_handling.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""Functions for converting "coordinates" in netCDF variable attributes
between paths that reference a group hierarchy and flattened paths.
between paths that reference a group hierarchy and flattened paths.
"""

import json
Expand Down
15 changes: 10 additions & 5 deletions concatenator/dimension_cleanup.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,9 @@ def remove_duplicate_dims(nc_dataset: nc.Dataset) -> nc.Dataset:

# Attributes for the original variable are retrieved.
attrs_contents = get_attributes_minus_fillvalue_and_renamed_coords(
original_var_name=dup_var_name, new_var_name=dim_dup_new, original_dataset=nc_dataset
original_var_name=dup_var_name,
new_var_name=dim_dup_new,
original_dataset=nc_dataset,
)
# for attrname in dup_var.ncattrs():
# if attrname != '_FillValue':
Expand All @@ -67,13 +69,11 @@ def remove_duplicate_dims(nc_dataset: nc.Dataset) -> nc.Dataset:

# Only create a new *Dimension* if it doesn't already exist.
if dim_dup_new not in nc_dataset.dimensions.keys():

# New dimension is created by copying from the duplicated dimension.
nc_dataset.createDimension(dim_dup_new, dim_dup_length)

# Only create a new dimension *Variable* if it existed originally in the NetCDF structure.
if dim_dup in nc_dataset.variables.keys():

# New variable object is created for the renamed, previously duplicated dimension.
new_dup_var[dim_dup_new] = nc_dataset.createVariable(
dim_dup_new,
Expand All @@ -82,7 +82,9 @@ def remove_duplicate_dims(nc_dataset: nc.Dataset) -> nc.Dataset:
fill_value=fill_value,
)
dim_var_attr_contents = get_attributes_minus_fillvalue_and_renamed_coords(
original_var_name=dim_dup, new_var_name=dim_dup_new, original_dataset=nc_dataset
original_var_name=dim_dup,
new_var_name=dim_dup_new,
original_dataset=nc_dataset,
)
for attr_name, contents in dim_var_attr_contents.items():
new_dup_var[dim_dup_new].setncattr(attr_name, contents)
Expand All @@ -94,7 +96,10 @@ def remove_duplicate_dims(nc_dataset: nc.Dataset) -> nc.Dataset:

# Replace original *Variable* with new variable with no duplicated dimensions.
new_dup_var[dup_var_name] = nc_dataset.createVariable(
dup_var_name, str(dup_var[:].dtype), tuple(new_dim_list), fill_value=fill_value
dup_var_name,
str(dup_var[:].dtype),
tuple(new_dim_list),
fill_value=fill_value,
)
for attr_name, contents in attrs_contents.items():
new_dup_var[dup_var_name].setncattr(attr_name, contents)
Expand Down
12 changes: 10 additions & 2 deletions concatenator/harmony/download_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,11 @@


def multi_core_download(
urls: list, destination_dir: str, access_token: str, cfg: dict, process_count: int | None = None
urls: list,
destination_dir: str,
access_token: str,
cfg: dict,
process_count: int | None = None,
) -> list[Path]:
"""
A method which automagically scales downloads to the number of CPU
Expand Down Expand Up @@ -74,7 +78,11 @@ def multi_core_download(


def _download_worker(
url_queue: queue.Queue, path_list: list, destination_dir: str, access_token: str, cfg: dict
url_queue: queue.Queue,
path_list: list,
destination_dir: str,
access_token: str,
cfg: dict,
) -> None:
"""
A method to be executed in a separate process which processes the url_queue
Expand Down
14 changes: 11 additions & 3 deletions concatenator/harmony/service_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,14 +134,22 @@ def process_catalog(self, catalog: pystac.Catalog) -> pystac.Catalog:
# -- Output to STAC catalog --
result.clear_items()
properties = dict(
start_datetime=datetimes["start_datetime"], end_datetime=datetimes["end_datetime"]
start_datetime=datetimes["start_datetime"],
end_datetime=datetimes["end_datetime"],
)

item = Item(
str(uuid4()), bbox_to_geometry(bounding_box), bounding_box, None, properties
str(uuid4()),
bbox_to_geometry(bounding_box),
bounding_box,
None,
properties,
)
asset = Asset(
staged_url, title=filename, media_type="application/x-netcdf4", roles=["data"]
staged_url,
title=filename,
media_type="application/x-netcdf4",
roles=["data"],
)
item.add_asset("data", asset)
result.add_item(item)
Expand Down
6 changes: 5 additions & 1 deletion concatenator/harmony/util.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Misc utility functions"""

from datetime import datetime

from pystac import Asset, Item
Expand Down Expand Up @@ -78,7 +79,10 @@ def _get_output_date_range(input_items: list[Item]) -> dict[str, str]:
start_datetime = min(start_datetime, new_start_datetime)
end_datetime = max(end_datetime, new_end_datetime)

return {"start_datetime": start_datetime.isoformat(), "end_datetime": end_datetime.isoformat()}
return {
"start_datetime": start_datetime.isoformat(),
"end_datetime": end_datetime.isoformat(),
}


def _get_item_date_range(item: Item) -> tuple[datetime, datetime]:
Expand Down
5 changes: 4 additions & 1 deletion concatenator/run_stitchee.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,10 @@ def parse_args(args: list) -> argparse.Namespace:
default="__",
)
parser.add_argument(
"-O", "--overwrite", action="store_true", help="Overwrite output file if it already exists."
"-O",
"--overwrite",
action="store_true",
help="Overwrite output file if it already exists.",
)
parser.add_argument(
"-v",
Expand Down
7 changes: 5 additions & 2 deletions concatenator/stitchee.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,11 @@ def stitchee(
concatenator.group_delim = group_delimiter

intermediate_flat_filepaths: list[str] = []
benchmark_log = {"flattening": 0.0, "concatenating": 0.0, "reconstructing_groups": 0.0}
benchmark_log = {
"flattening": 0.0,
"concatenating": 0.0,
"reconstructing_groups": 0.0,
}

# Proceed to concatenate only files that are workable (can be opened and are not empty).
input_files, num_input_files = validate_workable_files(files_to_concat, logger)
Expand Down Expand Up @@ -123,7 +127,6 @@ def stitchee(
# Instead of "with nc.Dataset() as" inside the loop, we use a context manager stack.
# This way all files are cleanly closed outside the loop.
with ExitStack() as context_stack:

logger.info("Flattening all input files...")
xrdataset_list = []
concat_dim_order = []
Expand Down
6 changes: 5 additions & 1 deletion docs/tutorial_examples.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -417,7 +417,11 @@
" ds_geo = xr.open_dataset(filepath, group=\"geolocation\")\n",
"\n",
" if index < 0:\n",
" X, Y, C = ds_geo.longitude.values, ds_geo.latitude.values, ds_product[product_name].values\n",
" X, Y, C = (\n",
" ds_geo.longitude.values,\n",
" ds_geo.latitude.values,\n",
" ds_product[product_name].values,\n",
" )\n",
" filename = filepath\n",
" else:\n",
" X, Y, C = (\n",
Expand Down
Loading

0 comments on commit 46801dc

Please sign in to comment.