From 3b5578bfa2b9b1b0865f99bce92901cd6d771573 Mon Sep 17 00:00:00 2001
From: ssolson <sterlingolson@yahoo.com>
Date: Fri, 9 Aug 2024 09:03:20 -0400
Subject: [PATCH 01/31] matplotlib >=3.8

---
 environment.yml        | 2 +-
 mhkit/wave/contours.py | 8 +-------
 requirements.txt       | 2 +-
 setup.py               | 2 +-
 4 files changed, 4 insertions(+), 10 deletions(-)

diff --git a/environment.yml b/environment.yml
index ac679176d..ee1b37bf0 100644
--- a/environment.yml
+++ b/environment.yml
@@ -8,7 +8,7 @@ dependencies:
   - pandas>=1.0.0
   - numpy>=1.21.0, <2.0.0
   - scipy<=1.13.1
-  - matplotlib
+  - matplotlib>=3.8.0
   - requests
   - lxml
   - scikit-learn
diff --git a/mhkit/wave/contours.py b/mhkit/wave/contours.py
index 905c560b8..b2a054c4c 100644
--- a/mhkit/wave/contours.py
+++ b/mhkit/wave/contours.py
@@ -8,11 +8,8 @@
 import numpy as np
 import warnings
 from mhkit.utils import to_numeric_array
-
 import matplotlib
 
-mpl_version = tuple(map(int, matplotlib.__version__.split(".")))
-
 
 # Contours
 def environmental_contours(x1, x2, sea_state_duration, return_period, method, **kwargs):
@@ -1696,10 +1693,7 @@ def _bivariate_KDE(x1, x2, bw, fit, nb_steps, Ndata_bivariate_KDE, kwargs):
     x1_bivariate_KDE = []
     x2_bivariate_KDE = []
 
-    if mpl_version < (3, 8):  # For versions before 3.8
-        segments = vals.allsegs[0]
-    else:
-        segments = [path.vertices for path in vals.get_paths()]
+    segments = [path.vertices for path in vals.get_paths()]
 
     for seg in segments:
         x1_bivariate_KDE.append(seg[:, 1])
diff --git a/requirements.txt b/requirements.txt
index 381f1068f..1f68d7614 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,7 +1,7 @@
 pandas>=1.0.0
 numpy>=1.21.0, <2.0.0
 scipy<=1.13.1
-matplotlib
+matplotlib>=3.8.0
 requests
 pecos>=0.3.0
 fatpack
diff --git a/setup.py b/setup.py
index c30ff2e9f..8d2825b3b 100644
--- a/setup.py
+++ b/setup.py
@@ -21,7 +21,7 @@
     "pandas>=1.0.0",
     "numpy>=1.21.0, <2.0.0",
     "scipy<=1.13.1",
-    "matplotlib",
+    "matplotlib>=3.8.0",
     "requests",
     "pecos>=0.3.0",
     "fatpack",

From 8b8d54bc381a728aa84dd5126568121e02317311 Mon Sep 17 00:00:00 2001
From: ssolson <sterlingolson@yahoo.com>
Date: Wed, 4 Sep 2024 10:08:21 -0400
Subject: [PATCH 02/31] lint utils

---
 .github/workflows/pylint.yml | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/.github/workflows/pylint.yml b/.github/workflows/pylint.yml
index 08458f95d..d5cca43e5 100644
--- a/.github/workflows/pylint.yml
+++ b/.github/workflows/pylint.yml
@@ -28,3 +28,7 @@ jobs:
       - name: Run Pylint on mhkit/power/
         run: |
           pylint mhkit/power/
+
+      - name: Run Pylint on mhkit/utils/
+        run: |
+          pylint mhkit/utils/

From e1196e1bc1f446c57a779743bc3065cac352790d Mon Sep 17 00:00:00 2001
From: ssolson <sterlingolson@yahoo.com>
Date: Wed, 4 Sep 2024 10:17:08 -0400
Subject: [PATCH 03/31] 10 lint coverage

---
 mhkit/utils/__init__.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/mhkit/utils/__init__.py b/mhkit/utils/__init__.py
index d20d4270b..b484862b0 100644
--- a/mhkit/utils/__init__.py
+++ b/mhkit/utils/__init__.py
@@ -1,3 +1,9 @@
+"""
+This module initializes and imports the essential utility functions for data 
+conversion, statistical analysis, caching, and event detection for the 
+MHKiT library.
+"""
+
 from .time_utils import matlab_to_datetime, excel_to_datetime, index_to_datetime
 from .stat_utils import (
     get_statistics,
@@ -15,4 +21,5 @@
     convert_nested_dict_and_pandas,
 )
 
+# pylint: disable=invalid-name
 _matlab = False  # Private variable indicating if mhkit is run through matlab

From f4ec0092f340707617e1ebeb55a8d07acc24e27d Mon Sep 17 00:00:00 2001
From: ssolson <sterlingolson@yahoo.com>
Date: Wed, 4 Sep 2024 11:48:29 -0400
Subject: [PATCH 04/31] reduce handle_caching to 5 inputs

---
 mhkit/river/io/usgs.py                 |  12 +-
 mhkit/tests/utils/test_cache.py        |  26 ++-
 mhkit/tidal/io/noaa.py                 |   8 +-
 mhkit/utils/cache.py                   | 301 ++++++++++++++++++-------
 mhkit/wave/io/cdip.py                  |  28 ++-
 mhkit/wave/io/hindcast/hindcast.py     |  24 +-
 mhkit/wave/io/hindcast/wind_toolkit.py |  13 +-
 mhkit/wave/io/ndbc.py                  |  31 ++-
 8 files changed, 337 insertions(+), 106 deletions(-)

diff --git a/mhkit/river/io/usgs.py b/mhkit/river/io/usgs.py
index 9b104f826..35ca11ecf 100644
--- a/mhkit/river/io/usgs.py
+++ b/mhkit/river/io/usgs.py
@@ -120,7 +120,10 @@ def request_usgs_data(
 
     # Use handle_caching to manage cache
     cached_data, metadata, cache_filepath = handle_caching(
-        hash_params, cache_dir, write_json, clear_cache
+        hash_params,
+        cache_dir,
+        cache_content={"data": None, "metadata": None, "write_json": write_json},
+        clear_cache_file=clear_cache,
     )
 
     if cached_data is not None:
@@ -165,7 +168,12 @@ def request_usgs_data(
 
     # After making the API request and processing the response, write the
     #  response to a cache file
-    handle_caching(hash_params, cache_dir, data=data, clear_cache_file=clear_cache)
+    handle_caching(
+        hash_params,
+        cache_dir,
+        cache_content={"data": data, "metadata": None, "write_json": None},
+        clear_cache_file=clear_cache,
+    )
 
     if write_json:
         shutil.copy(cache_filepath, write_json)
diff --git a/mhkit/tests/utils/test_cache.py b/mhkit/tests/utils/test_cache.py
index 14aae0802..3cd5fff43 100644
--- a/mhkit/tests/utils/test_cache.py
+++ b/mhkit/tests/utils/test_cache.py
@@ -93,7 +93,11 @@ def test_handle_caching_creates_cache(self):
         Asserts:
         - The cache file is successfully created at the expected file path.
         """
-        handle_caching(self.hash_params, self.cache_dir, data=self.data)
+        handle_caching(
+            self.hash_params,
+            self.cache_dir,
+            cache_content={"data": self.data, "metadata": None, "write_json": None},
+        )
 
         cache_filename = (
             hashlib.md5(self.hash_params.encode("utf-8")).hexdigest() + ".json"
@@ -114,8 +118,18 @@ def test_handle_caching_retrieves_data(self):
         Asserts:
         - The retrieved data matches the original sample DataFrame.
         """
-        handle_caching(self.hash_params, self.cache_dir, data=self.data)
-        retrieved_data, _, _ = handle_caching(self.hash_params, self.cache_dir)
+        handle_caching(
+            self.hash_params,
+            self.cache_dir,
+            cache_content={"data": self.data, "metadata": None, "write_json": None},
+        )
+
+        retrieved_data, _, _ = handle_caching(
+            self.hash_params,
+            self.cache_dir,
+            cache_content={"data": None, "metadata": None, "write_json": None},
+        )
+
         pd.testing.assert_frame_equal(self.data, retrieved_data, check_freq=False)
 
     def test_handle_caching_cdip_file_extension(self):
@@ -132,7 +146,11 @@ def test_handle_caching_cdip_file_extension(self):
         - The cache file with a ".pkl" extension is successfully created at the expected file path.
         """
         cache_dir = os.path.join(self.cache_dir, "cdip")
-        handle_caching(self.hash_params, cache_dir, data=self.data)
+        handle_caching(
+            self.hash_params,
+            cache_dir,
+            cache_content={"data": self.data, "metadata": None, "write_json": None},
+        )
 
         cache_filename = (
             hashlib.md5(self.hash_params.encode("utf-8")).hexdigest() + ".pkl"
diff --git a/mhkit/tidal/io/noaa.py b/mhkit/tidal/io/noaa.py
index d0aadc861..2ab8a1d2a 100644
--- a/mhkit/tidal/io/noaa.py
+++ b/mhkit/tidal/io/noaa.py
@@ -124,7 +124,10 @@ def request_noaa_data(
 
     # Use handle_caching to manage cache
     cached_data, cached_metadata, cache_filepath = handle_caching(
-        hash_params, cache_dir, write_json=write_json, clear_cache_file=clear_cache
+        hash_params,
+        cache_dir,
+        cache_content={"data": None, "metadata": None, "write_json": write_json},
+        clear_cache_file=clear_cache,
     )
 
     if cached_data is not None:
@@ -205,8 +208,7 @@ def request_noaa_data(
         handle_caching(
             hash_params,
             cache_dir,
-            data=data,
-            metadata=metadata,
+            cache_content={"data": data, "metadata": metadata, "write_json": None},
             clear_cache_file=clear_cache,
         )
 
diff --git a/mhkit/utils/cache.py b/mhkit/utils/cache.py
index 423a12757..de30a4e7e 100644
--- a/mhkit/utils/cache.py
+++ b/mhkit/utils/cache.py
@@ -42,18 +42,163 @@
 import hashlib
 import json
 import os
-import re
+
+# import re
 import shutil
 import pickle
 import pandas as pd
 
 
+# def old_handle_caching(
+#     hash_params,
+#     cache_dir,
+#     data=None,
+#     metadata=None,
+#     write_json=None,
+#     clear_cache_file=False,
+# ):
+#     """
+#     Handles caching of data to avoid redundant network requests or
+#     computations.
+
+#     The function checks if a cache file exists for the given parameters.
+#     If it does, the function will load data from the cache file, unless
+#     the `clear_cache_file` parameter is set to `True`, in which case the
+#     cache file is cleared. If the cache file does not exist and the
+#     `data` parameter is not `None`, the function will store the
+#     provided data in a cache file.
+
+#     Parameters
+#     ----------
+#     hash_params : str
+#         The parameters to be hashed and used as the filename for the cache file.
+#     cache_dir : str
+#         The directory where the cache files are stored.
+#     data : pandas DataFrame or None
+#         The data to be stored in the cache file. If `None`, the function
+#         will attempt to load data from the cache file.
+#     metadata : dict or None
+#         Metadata associated with the data. This will be stored in the
+#         cache file along with the data.
+#     write_json : str or None
+#         If specified, the cache file will be copied to a file with this name.
+#     clear_cache_file : bool
+#         If `True`, the cache file for the given parameters will be cleared.
+
+#     Returns
+#     -------
+#     data : pandas DataFrame or None
+#         The data loaded from the cache file. If data was provided as a
+#         parameter, the same data will be returned. If the cache file
+#         does not exist and no data was provided, `None` will be returned.
+#     metadata : dict or None
+#         The metadata loaded from the cache file. If metadata was provided
+#         as a parameter, the same metadata will be returned. If the cache
+#         file does not exist and no metadata was provided, `None` will be
+#         returned.
+#     cache_filepath : str
+#         The path to the cache file.
+#     """
+
+#     # Check if 'cdip' is in cache_dir, then use .pkl instead of .json
+#     file_extension = (
+#         ".pkl"
+#         if "cdip" in cache_dir or "hindcast" in cache_dir or "ndbc" in cache_dir
+#         else ".json"
+#     )
+
+#     # Make cache directory if it doesn't exist
+#     if not os.path.isdir(cache_dir):
+#         os.makedirs(cache_dir)
+
+#     # Create a unique filename based on the function parameters
+#     cache_filename = (
+#         hashlib.md5(hash_params.encode("utf-8")).hexdigest() + file_extension
+#     )
+#     cache_filepath = os.path.join(cache_dir, cache_filename)
+
+#     # If clear_cache_file is True, remove the cache file for this request
+#     if clear_cache_file and os.path.isfile(cache_filepath):
+#         os.remove(cache_filepath)
+#         print(f"Cleared cache for {cache_filepath}")
+
+#     # If a cached file exists, load and return the data from the file
+#     if os.path.isfile(cache_filepath) and data is None:
+#         if file_extension == ".json":
+#             with open(cache_filepath, encoding="utf-8") as f:
+#                 json_data = json.load(f)
+
+#             # Extract metadata if it exists
+#             if "metadata" in json_data:
+#                 metadata = json_data.pop("metadata", None)
+
+#             # Check if index is datetime formatted
+#             if all(
+#                 re.match(r"\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}", str(dt))
+#                 for dt in json_data["index"]
+#             ):
+#                 data = pd.DataFrame(
+#                     json_data["data"],
+#                     index=pd.to_datetime(json_data["index"]),
+#                     columns=json_data["columns"],
+#                 )
+#             else:
+#                 data = pd.DataFrame(
+#                     json_data["data"],
+#                     index=json_data["index"],
+#                     columns=json_data["columns"],
+#                 )
+
+#             # Convert the rest to DataFrame
+#             data = pd.DataFrame(
+#                 json_data["data"],
+#                 index=pd.to_datetime(json_data["index"]),
+#                 columns=json_data["columns"],
+#             )
+
+#         elif file_extension == ".pkl":
+#             with open(cache_filepath, "rb") as f:
+#                 data, metadata = pickle.load(f)
+
+#         if write_json:
+#             shutil.copy(cache_filepath, write_json)
+
+#         return data, metadata, cache_filepath
+
+#     # If a cached file does not exist and data is provided,
+#     # store the data in a cache file
+#     if data is not None:
+#         if file_extension == ".json":
+#             # Convert DataFrame to python dict
+#             py_data = data.to_dict(orient="split")
+#             # Add metadata to py_data
+#             py_data["metadata"] = metadata
+#             # Check if index is datetime indexed
+#             if isinstance(data.index, pd.DatetimeIndex):
+#                 py_data["index"] = [
+#                     dt.strftime("%Y-%m-%d %H:%M:%S") for dt in py_data["index"]
+#                 ]
+#             else:
+#                 py_data["index"] = list(data.index)
+#             with open(cache_filepath, "w", encoding="utf-8") as f:
+#                 json.dump(py_data, f)
+
+#         elif file_extension == ".pkl":
+#             with open(cache_filepath, "wb") as f:
+#                 pickle.dump((data, metadata), f)
+
+#         if write_json:
+#             shutil.copy(cache_filepath, write_json)
+
+#         return data, metadata, cache_filepath
+#     # If data is not provided and the cache file doesn't exist, return cache_filepath
+#     return None, None, cache_filepath
+
+
 def handle_caching(
     hash_params,
     cache_dir,
-    data=None,
-    metadata=None,
-    write_json=None,
+    cache_content=None,
     clear_cache_file=False,
 ):
     """
@@ -73,14 +218,10 @@ def handle_caching(
         The parameters to be hashed and used as the filename for the cache file.
     cache_dir : str
         The directory where the cache files are stored.
-    data : pandas DataFrame or None
-        The data to be stored in the cache file. If `None`, the function
+    cache_content : dict or None
+        Dictionary containing 'data' (pandas DataFrame or None), 'metadata'
+        (dict or None), and 'write_json' (str or None). If `None`, the function
         will attempt to load data from the cache file.
-    metadata : dict or None
-        Metadata associated with the data. This will be stored in the
-        cache file along with the data.
-    write_json : str or None
-        If specified, the cache file will be copied to a file with this name.
     clear_cache_file : bool
         If `True`, the cache file for the given parameters will be cleared.
 
@@ -99,98 +240,92 @@ def handle_caching(
         The path to the cache file.
     """
 
-    # Check if 'cdip' is in cache_dir, then use .pkl instead of .json
-    file_extension = (
-        ".pkl"
-        if "cdip" in cache_dir or "hindcast" in cache_dir or "ndbc" in cache_dir
-        else ".json"
-    )
-
-    # Make cache directory if it doesn't exist
-    if not os.path.isdir(cache_dir):
-        os.makedirs(cache_dir)
-
-    # Create a unique filename based on the function parameters
-    cache_filename = (
-        hashlib.md5(hash_params.encode("utf-8")).hexdigest() + file_extension
-    )
-    cache_filepath = os.path.join(cache_dir, cache_filename)
-
-    # If clear_cache_file is True, remove the cache file for this request
-    if clear_cache_file and os.path.isfile(cache_filepath):
-        os.remove(cache_filepath)
-        print(f"Cleared cache for {cache_filepath}")
-
-    # If a cached file exists, load and return the data from the file
-    if os.path.isfile(cache_filepath) and data is None:
+    # Initialize data and metadata to None to avoid pylint errors
+    data = None
+    metadata = None
+
+    def _generate_cache_filepath():
+        """Generates the cache file path based on the hashed parameters."""
+        file_extension = (
+            ".pkl"
+            if "cdip" in cache_dir or "hindcast" in cache_dir or "ndbc" in cache_dir
+            else ".json"
+        )
+        cache_filename = (
+            hashlib.md5(hash_params.encode("utf-8")).hexdigest() + file_extension
+        )
+        return os.path.join(cache_dir, cache_filename), file_extension
+
+    def _clear_cache(cache_filepath):
+        """Clear the cache file if requested."""
+        if clear_cache_file and os.path.isfile(cache_filepath):
+            os.remove(cache_filepath)
+            print(f"Cleared cache for {cache_filepath}")
+
+    def _load_cache(file_extension, cache_filepath):
+        """Load data from the cache file based on its extension."""
+        nonlocal data, metadata  # Specify that these are outer variables
         if file_extension == ".json":
             with open(cache_filepath, encoding="utf-8") as f:
-                jsonData = json.load(f)
-
-            # Extract metadata if it exists
-            if "metadata" in jsonData:
-                metadata = jsonData.pop("metadata", None)
-
-            # Check if index is datetime formatted
-            if all(
-                re.match(r"\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}", str(dt))
-                for dt in jsonData["index"]
-            ):
-                data = pd.DataFrame(
-                    jsonData["data"],
-                    index=pd.to_datetime(jsonData["index"]),
-                    columns=jsonData["columns"],
-                )
-            else:
-                data = pd.DataFrame(
-                    jsonData["data"],
-                    index=jsonData["index"],
-                    columns=jsonData["columns"],
-                )
+                json_data = json.load(f)
+
+            metadata = json_data.pop("metadata", None)
 
-            # Convert the rest to DataFrame
             data = pd.DataFrame(
-                jsonData["data"],
-                index=pd.to_datetime(jsonData["index"]),
-                columns=jsonData["columns"],
+                json_data["data"],
+                index=pd.to_datetime(json_data["index"]),
+                columns=json_data["columns"],
             )
-
         elif file_extension == ".pkl":
             with open(cache_filepath, "rb") as f:
                 data, metadata = pickle.load(f)
 
-        if write_json:
-            shutil.copy(cache_filepath, write_json)
-
-        return data, metadata, cache_filepath
+        return data, metadata
 
-    # If a cached file does not exist and data is provided,
-    # store the data in a cache file
-    elif data is not None:
+    def _write_cache(data, metadata, file_extension, cache_filepath):
+        """Store data in the cache file based on the extension."""
         if file_extension == ".json":
-            # Convert DataFrame to python dict
-            pyData = data.to_dict(orient="split")
-            # Add metadata to pyData
-            pyData["metadata"] = metadata
-            # Check if index is datetime indexed
+            py_data = data.to_dict(orient="split")
+            py_data["metadata"] = metadata
             if isinstance(data.index, pd.DatetimeIndex):
-                pyData["index"] = [
-                    dt.strftime("%Y-%m-%d %H:%M:%S") for dt in pyData["index"]
+                py_data["index"] = [
+                    dt.strftime("%Y-%m-%d %H:%M:%S") for dt in py_data["index"]
                 ]
             else:
-                pyData["index"] = list(data.index)
+                py_data["index"] = list(data.index)
             with open(cache_filepath, "w", encoding="utf-8") as f:
-                json.dump(pyData, f)
-
+                json.dump(py_data, f)
         elif file_extension == ".pkl":
             with open(cache_filepath, "wb") as f:
                 pickle.dump((data, metadata), f)
 
-        if write_json:
-            shutil.copy(cache_filepath, write_json)
+    # Create the cache directory if it doesn't exist
+    if not os.path.isdir(cache_dir):
+        os.makedirs(cache_dir)
+
+    # Generate cache filepath and extension
+    cache_filepath, file_extension = _generate_cache_filepath()
+
+    # Clear cache if requested
+    _clear_cache(cache_filepath)
+
+    # Check if cache file exists and load if no data provided
+    if os.path.isfile(cache_filepath) and cache_content is None:
+        return _load_cache(file_extension, cache_filepath) + (cache_filepath,)
+
+    # Store data in cache if provided
+    if cache_content and cache_content["data"] is not None:
+        _write_cache(
+            cache_content["data"],
+            cache_content["metadata"],
+            file_extension,
+            cache_filepath,
+        )
+        if cache_content["write_json"]:
+            shutil.copy(cache_filepath, cache_content["write_json"])
+
+        return cache_content["data"], cache_content["metadata"], cache_filepath
 
-        return data, metadata, cache_filepath
-    # If data is not provided and the cache file doesn't exist, return cache_filepath
     return None, None, cache_filepath
 
 
diff --git a/mhkit/wave/io/cdip.py b/mhkit/wave/io/cdip.py
index 5fb6e34f3..92a1d47e6 100644
--- a/mhkit/wave/io/cdip.py
+++ b/mhkit/wave/io/cdip.py
@@ -324,7 +324,11 @@ def request_parse_workflow(
     if not multiyear:
         # Check the cache first
         hash_params = f"{station_number}-{parameters}-{start_date}-{end_date}"
-        data = handle_caching(hash_params, cache_dir)
+        data, _, _ = handle_caching(
+            hash_params,
+            cache_dir,
+            cache_content={"data": None, "metadata": None, "write_json": None},
+        )
 
         if data[:2] == (None, None):
             data = get_netcdf_variables(
@@ -335,7 +339,11 @@ def request_parse_workflow(
                 all_2D_variables=all_2D_variables,
                 silent=silent,
             )
-            handle_caching(hash_params, cache_dir, data=data)
+            handle_caching(
+                hash_params,
+                cache_dir,
+                cache_content={"data": data, "metadata": None, "write_json": None},
+            )
         else:
             data = data[0]
 
@@ -348,7 +356,11 @@ def request_parse_workflow(
 
             # Check the cache for each individual year
             hash_params = f"{station_number}-{parameters}-{start_date}-{end_date}"
-            year_data = handle_caching(hash_params, cache_dir)
+            year_data, _, _ = handle_caching(
+                hash_params,
+                cache_dir,
+                cache_content={"data": None, "metadata": None, "write_json": None},
+            )
             if year_data[:2] == (None, None):
                 year_data = get_netcdf_variables(
                     nc,
@@ -359,7 +371,15 @@ def request_parse_workflow(
                     silent=silent,
                 )
                 # Cache the individual year's data
-                handle_caching(hash_params, cache_dir, data=year_data)
+                handle_caching(
+                    hash_params,
+                    cache_dir,
+                    cache_content={
+                        "data": year_data,
+                        "metadata": None,
+                        "write_json": None,
+                    },
+                )
             else:
                 year_data = year_data[0]
             multiyear_data[year] = year_data["data"]
diff --git a/mhkit/wave/io/hindcast/hindcast.py b/mhkit/wave/io/hindcast/hindcast.py
index 4bcc4486e..c58e55c40 100644
--- a/mhkit/wave/io/hindcast/hindcast.py
+++ b/mhkit/wave/io/hindcast/hindcast.py
@@ -192,7 +192,11 @@ def request_wpto_point_data(
     # Construct a string representation of the function parameters
     hash_params = f"{data_type}_{parameter}_{lat_lon}_{years}_{tree}_{unscale}_{str_decode}_{hsds}_{path}_{to_pandas}"
     cache_dir = _get_cache_dir()
-    data, meta, _ = handle_caching(hash_params, cache_dir)
+    data, meta, _ = handle_caching(
+        hash_params,
+        cache_dir,
+        cache_content={"data": None, "metadata": None, "write_json": None},
+    )
 
     if data is not None:
         return data, meta
@@ -277,7 +281,11 @@ def request_wpto_point_data(
                 data = data.drop_vars("index")
 
         # save_to_cache(hash_params, data, meta)
-        handle_caching(hash_params, cache_dir, data, meta)
+        handle_caching(
+            hash_params,
+            cache_dir,
+            cache_content={"data": data, "metadata": meta, "write_json": None},
+        )
 
         return data, meta
 
@@ -374,7 +382,11 @@ def request_wpto_directional_spectrum(
     # Attempt to load data from cache
     hash_params = f"{lat_lon}_{year}_{tree}_{unscale}_{str_decode}_{hsds}_{path}"
     cache_dir = _get_cache_dir()
-    data, meta, _ = handle_caching(hash_params, cache_dir)
+    data, meta, _ = handle_caching(
+        hash_params,
+        cache_dir,
+        cache_content={"data": None, "metadata": None, "write_json": None},
+    )
 
     if data is not None:
         return data, meta
@@ -480,7 +492,11 @@ def request_wpto_directional_spectrum(
             },
         )
 
-    handle_caching(hash_params, cache_dir, data, meta)
+    handle_caching(
+        hash_params,
+        cache_dir,
+        cache_content={"data": data, "metadata": meta, "write_json": None},
+    )
 
     return data, meta
 
diff --git a/mhkit/wave/io/hindcast/wind_toolkit.py b/mhkit/wave/io/hindcast/wind_toolkit.py
index aad65c09d..2205e2be4 100644
--- a/mhkit/wave/io/hindcast/wind_toolkit.py
+++ b/mhkit/wave/io/hindcast/wind_toolkit.py
@@ -417,7 +417,12 @@ def request_wtk_point_data(
     hash_params = f"{time_interval}_{parameter}_{lat_lon}_{years}_{preferred_region}_{tree}_{unscale}_{str_decode}_{hsds}"
 
     # Use handle_caching to manage caching.
-    data, meta, _ = handle_caching(hash_params, cache_dir, clear_cache_file=clear_cache)
+    data, meta, _ = handle_caching(
+        hash_params,
+        cache_dir,
+        cache_content={"data": None, "metadata": None, "write_json": None},
+        clear_cache_file=clear_cache,
+    )
 
     if data is not None and meta is not None:
         if not to_pandas:
@@ -478,7 +483,11 @@ def request_wtk_point_data(
             meta = meta.reset_index(drop=True)
 
         # Save the retrieved data and metadata to cache.
-        handle_caching(hash_params, cache_dir, data=data, metadata=meta)
+        handle_caching(
+            hash_params,
+            cache_dir,
+            cache_content={"data": data, "metadata": meta, "write_json": None},
+        )
 
         if not to_pandas:
             data = convert_to_dataset(data)
diff --git a/mhkit/wave/io/ndbc.py b/mhkit/wave/io/ndbc.py
index 12ad3e9a7..3356358cd 100644
--- a/mhkit/wave/io/ndbc.py
+++ b/mhkit/wave/io/ndbc.py
@@ -207,7 +207,12 @@ def available_data(
     cache_dir = os.path.join(os.path.expanduser("~"), ".cache", "mhkit", "ndbc")
 
     # Check the cache before making the request
-    data, _, _ = handle_caching(hash_params, cache_dir, clear_cache_file=clear_cache)
+    data, _, _ = handle_caching(
+        hash_params,
+        cache_dir,
+        cache_content={"data": None, "metadata": None, "write_json": None},
+        clear_cache_file=clear_cache,
+    )
 
     # no coverage bc in coverage runs we have already cached the data/ run this code
     if data is None:  # pragma: no cover
@@ -246,7 +251,16 @@ def available_data(
                 data = available_data[available_data.id == buoy_number[i]]
                 available_data = available_data.append(data)
         # Cache the result
-        handle_caching(hash_params, cache_dir, data=available_data)
+        handle_caching(
+            hash_params,
+            cache_dir,
+            cache_content={
+                "data": available_data,
+                "metadata": None,
+                "write_json": None,
+            },
+        )
+
     else:
         available_data = data
 
@@ -371,7 +385,10 @@ def request_data(parameter, filenames, proxy=None, clear_cache=False, to_pandas=
             # Create a unique filename based on the function parameters for caching
             hash_params = f"{buoy_id}_{parameter}_{year}_{filename}"
             cached_data, _, _ = handle_caching(
-                hash_params, cache_dir, clear_cache_file=clear_cache
+                hash_params,
+                cache_dir,
+                cache_content={"data": None, "metadata": None, "write_json": None},
+                clear_cache_file=clear_cache,
             )
 
             if cached_data is not None:
@@ -415,7 +432,13 @@ def request_data(parameter, filenames, proxy=None, clear_cache=False, to_pandas=
                 # Cache the data after processing it if it exists
                 if year in ndbc_data[buoy_id]:
                     handle_caching(
-                        hash_params, cache_dir, data=ndbc_data[buoy_id][year]
+                        hash_params,
+                        cache_dir,
+                        cache_content={
+                            "data": ndbc_data[buoy_id][year],
+                            "metadata": None,
+                            "write_json": None,
+                        },
                     )
 
     if buoy_id and len(ndbc_data) == 1:

From 88810cf957411239443c4650251276d5039592b0 Mon Sep 17 00:00:00 2001
From: ssolson <sterlingolson@yahoo.com>
Date: Thu, 5 Sep 2024 11:04:29 -0400
Subject: [PATCH 05/31] index is now "t"

---
 mhkit/tests/tidal/test_io.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mhkit/tests/tidal/test_io.py b/mhkit/tests/tidal/test_io.py
index 280b847ce..5ce38e4fa 100644
--- a/mhkit/tests/tidal/test_io.py
+++ b/mhkit/tests/tidal/test_io.py
@@ -103,11 +103,11 @@ def test_request_noaa_data_basic_xarray(self):
         )
         # Check if the variable sets are equal
         data_variables = list(data.variables)
-        required_variables = ["index", "s", "d", "b"]
+        required_variables = ["t", "s", "d", "b"]
         data_variables_set = set(data_variables)
         required_variables_set = set(required_variables)
         self.assertTrue(data_variables_set == required_variables_set)
-        self.assertEqual(len(data["index"]), 183)
+        self.assertEqual(len(data["t"]), 183)
         self.assertEqual(data.attrs["id"], "s08010")
 
     def test_request_noaa_data_write_json(self):

From 71974b33a5e84927fa613a222ed22fbb06e17656 Mon Sep 17 00:00:00 2001
From: ssolson <sterlingolson@yahoo.com>
Date: Thu, 5 Sep 2024 11:08:08 -0400
Subject: [PATCH 06/31] 10/10 lint

---
 mhkit/utils/upcrossing.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/mhkit/utils/upcrossing.py b/mhkit/utils/upcrossing.py
index 5993d6544..18db3f142 100644
--- a/mhkit/utils/upcrossing.py
+++ b/mhkit/utils/upcrossing.py
@@ -75,15 +75,15 @@ def upcrossing(t, data):
         raise ValueError("only 1D data supported, try calling squeeze()")
 
     # eliminate zeros
-    zeroMask = data == 0
-    data[zeroMask] = 0.5 * np.min(np.abs(data))
+    zero_mask = data == 0
+    data[zero_mask] = 0.5 * np.min(np.abs(data))
 
     # zero up-crossings
     diff = np.diff(np.sign(data))
-    zeroUpCrossings_mask = (diff == 2) | (diff == 1)
-    zeroUpCrossings_index = np.where(zeroUpCrossings_mask)[0]
+    zero_upcrossings_mask = (diff == 2) | (diff == 1)
+    zero_upcrossings_index = np.where(zero_upcrossings_mask)[0]
 
-    return zeroUpCrossings_index
+    return zero_upcrossings_index
 
 
 def peaks(t, data, inds=None):

From 8d0263f2b66aad88b24ffff699be5af2c0b489c1 Mon Sep 17 00:00:00 2001
From: ssolson <sterlingolson@yahoo.com>
Date: Thu, 5 Sep 2024 11:32:45 -0400
Subject: [PATCH 07/31] 10/10 lint

---
 mhkit/utils/stat_utils.py | 165 ++++++++++++++++++++++++--------------
 1 file changed, 104 insertions(+), 61 deletions(-)

diff --git a/mhkit/utils/stat_utils.py b/mhkit/utils/stat_utils.py
index f0a7e2994..681636680 100644
--- a/mhkit/utils/stat_utils.py
+++ b/mhkit/utils/stat_utils.py
@@ -1,9 +1,57 @@
-from mhkit import qc
+"""
+This module contains functions to perform various statistical calculations 
+on continuous data. It includes functions for calculating statistics such as
+mean, max, min, and standard deviation over specific windows, as well as functions
+for vector/directional statistics. The module also provides utility functions 
+to unwrap vectors, compute magnitudes and phases in 2D/3D, and calculate 
+the root mean squared values of vector components.
+
+Functions:
+----------
+- get_statistics: Calculates statistics for continuous data.
+- vector_statistics: Calculates vector mean and standard deviation.
+- unwrap_vector: Unwraps vector data to fall within a 0-360 degree range.
+- magnitude_phase: Computes magnitude and phase for 2D or 3D data.
+- unorm: Computes root mean squared value of 3D vectors.
+"""
+
 import pandas as pd
 import numpy as np
+from mhkit import qc
+
+
+def _calculate_statistics(datachunk, vector_channels):
+    """
+    Calculate the mean, max, min, and standard deviation for the given datachunk.
+    Also calculate vector statistics for vector_channels.
+
+    Parameters
+    ----------
+    datachunk : pandas DataFrame
+        A chunk of data on which to perform statistics.
+    vector_channels : list
+        List of vector channel names formatted in deg (0-360).
+
+    Returns
+    -------
+    stats : dict
+        A dictionary containing 'means', 'maxs', 'mins', and 'stdevs'.
+    """
+    means = datachunk.mean()
+    maxs = datachunk.max()
+    mins = datachunk.min()
+    stdevs = datachunk.std()
+
+    for v in vector_channels:
+        vector_avg, vector_std = vector_statistics(datachunk[v])
+        # overwrite scalar average and std for channel
+        means[v] = vector_avg
+        stdevs[v] = vector_std
 
+    return {"means": means, "maxs": maxs, "mins": mins, "stdevs": stdevs}
 
-def get_statistics(data, freq, period=600, vector_channels=[]):
+
+def get_statistics(data, freq, period=600, vector_channels=None):
     """
     Calculate mean, max, min and stdev statistics of continuous data for a
     given statistical window. Default length of statistical window (period) is
@@ -26,71 +74,63 @@ def get_statistics(data, freq, period=600, vector_channels=[]):
     means,maxs,mins,stdevs : pandas DataFrame
         Calculated statistical values from the data, indexed by the first timestamp
     """
-    # Check data type
+    if vector_channels is None:
+        vector_channels = []
+
+    if isinstance(vector_channels, str):
+        vector_channels = [vector_channels]
+
     if not isinstance(data, pd.DataFrame):
         raise TypeError(f"data must be of type pd.DataFrame. Got: {type(data)}")
     if not isinstance(freq, (float, int)):
         raise TypeError(f"freq must be of type int or float. Got: {type(freq)}")
     if not isinstance(period, (float, int)):
         raise TypeError(f"period must be of type int or float. Got: {type(period)}")
-    # catch if vector_channels is not an string array
-    if isinstance(vector_channels, str):
-        vector_channels = [vector_channels]
     if not isinstance(vector_channels, list):
         raise TypeError(
             f"vector_channels must be a list of strings. Got: {type(vector_channels)}"
         )
 
-    # Check timestamp using qc module
     data.index = data.index.round("1ms")
-    dataQC = qc.check_timestamp(data, 1 / freq)
-    dataQC = dataQC["cleaned_data"]
+    data_qc = qc.check_timestamp(data, 1 / freq)["cleaned_data"]
 
-    # Check to see if data length contains enough data points for statistical window
-    if len(dataQC) % (period * freq) > 0:
-        remain = len(dataQC) % (period * freq)
-        dataQC = dataQC.iloc[0 : -int(remain)]
+    if len(data_qc) % (period * freq) > 0:
+        remain = len(data_qc) % (period * freq)
+        data_qc = data_qc.iloc[0 : -int(remain)]
         print(
-            "WARNING: there were not enough data points in the last statistical period. Last "
-            + str(remain)
-            + " points were removed."
+            f"WARNING: there were not enough data points in the last statistical period. \
+              Last {remain} points were removed."
         )
 
-    # Pre-allocate lists
     time = []
     means = []
     maxs = []
     mins = []
-    stdev = []
+    stdevs = []
 
-    # Get data chunks to performs stats on
     step = period * freq
-    for i in range(int(len(dataQC) / (period * freq))):
-        datachunk = dataQC.iloc[i * step : (i + 1) * step]
-        # Check whether there are any NaNs in datachunk
+    for i in range(int(len(data_qc) / step)):
+        datachunk = data_qc.iloc[i * step : (i + 1) * step]
         if datachunk.isnull().any().any():
             print("NaNs found in statistical window...check timestamps!")
             input("Press <ENTER> to continue")
             continue
-        else:
-            # Get stats
-            time.append(datachunk.index.values[0])  # time vector
-            maxs.append(datachunk.max())  # maxes
-            mins.append(datachunk.min())  # mins
-            means.append(datachunk.mean())  # means
-            stdev.append(datachunk.std())  # standard deviation
-            # calculate vector averages and std
-            for v in vector_channels:
-                vector_avg, vector_std = vector_statistics(datachunk[v])
-                # overwrite scalar average for channel
-                means[i][v] = vector_avg
-                stdev[i][v] = vector_std  # overwrite scalar std for channel
-
-    # Convert to DataFrames and set index
+
+        time.append(datachunk.index.values[0])
+
+        # Calculate statistics for this chunk
+        stats = _calculate_statistics(datachunk, vector_channels)
+
+        means.append(stats["means"])
+        maxs.append(stats["maxs"])
+        mins.append(stats["mins"])
+        stdevs.append(stats["stdevs"])
+
+    # Convert lists to DataFrames
     means = pd.DataFrame(means, index=time)
     maxs = pd.DataFrame(maxs, index=time)
     mins = pd.DataFrame(mins, index=time)
-    stdevs = pd.DataFrame(stdev, index=time)
+    stdevs = pd.DataFrame(stdevs, index=time)
 
     return means, maxs, mins, stdevs
 
@@ -114,22 +154,23 @@ def vector_statistics(data):
     """
     try:
         data = np.array(data)
-    except:
-        pass
+    except (TypeError, ValueError) as e:
+        raise TypeError(f"Error converting data to numpy array: {e}") from e
+
     if not isinstance(data, np.ndarray):
         raise TypeError(f"data must be of type np.ndarray. Got: {type(data)}")
 
     # calculate mean
-    Ux = sum(np.sin(data * np.pi / 180)) / len(data)
-    Uy = sum(np.cos(data * np.pi / 180)) / len(data)
-    vector_avg = 90 - np.arctan2(Uy, Ux) * 180 / np.pi
+    u_x = sum(np.sin(data * np.pi / 180)) / len(data)
+    u_y = sum(np.cos(data * np.pi / 180)) / len(data)
+    vector_avg = 90 - np.arctan2(u_y, u_x) * 180 / np.pi
     if vector_avg < 0:
         vector_avg = vector_avg + 360
     elif vector_avg > 360:
         vector_avg = vector_avg - 360
     # calculate standard deviation
     # round to 8th decimal place to reduce roundoff error
-    magsum = round((Ux**2 + Uy**2) * 1e8) / 1e8
+    magsum = round((u_x**2 + u_y**2) * 1e8) / 1e8
     epsilon = (1 - magsum) ** 0.5
     if not np.isreal(epsilon):  # check if epsilon is imaginary (error)
         vector_std = 0
@@ -157,17 +198,19 @@ def unwrap_vector(data):
     # Check data types
     try:
         data = np.array(data)
-    except:
-        pass
+    except (TypeError, ValueError) as e:
+        raise TypeError(f"Error converting data to numpy array: {e}") from e
+
     if not isinstance(data, np.ndarray):
         raise TypeError(f"data must be of type np.ndarray. Got: {type(data)}")
 
     # Loop through and unwrap points
-    for i in range(len(data)):
-        if data[i] < 0:
-            data[i] = data[i] + 360
-        elif data[i] > 360:
-            data[i] = data[i] - 360
+    for i, value in enumerate(data):
+        if value < 0:
+            data[i] = value + 360
+        elif value > 360:
+            data[i] = value - 360
+
     if max(data) > 360 or min(data) < 0:
         data = unwrap_vector(data)
     return data
@@ -199,10 +242,10 @@ def magnitude_phase(x, y, z=None):
     x = np.array(x)
     y = np.array(y)
 
-    threeD = False
+    three_d = False
     if not isinstance(z, type(None)):
         z = np.array(z)
-        threeD = True
+        three_d = True
 
     if not isinstance(x, (float, int, np.ndarray)):
         raise TypeError(f"x must be of type float, int, or np.ndarray. Got: {type(x)}")
@@ -213,15 +256,15 @@ def magnitude_phase(x, y, z=None):
             f"If specified, z must be of type float, int, or np.ndarray. Got: {type(z)}"
         )
 
-    if threeD:
+    if three_d:
         mag = np.sqrt(x**2 + y**2 + z**2)
         theta = np.arctan2(y, x)
         phi = np.arctan2(np.sqrt(x**2 + y**2), z)
         return mag, theta, phi
-    else:
-        mag = np.sqrt(x**2 + y**2)
-        theta = np.arctan2(y, x)
-        return mag, theta
+
+    mag = np.sqrt(x**2 + y**2)
+    theta = np.arctan2(y, x)
+    return mag, theta
 
 
 def unorm(x, y, z):
@@ -239,7 +282,7 @@ def unorm(x, y, z):
 
     Returns
     -------
-    unorm : array
+    u_norm : array
        The root mean squared of x, y, and z.
 
     Example
@@ -265,6 +308,6 @@ def unorm(x, y, z):
         raise ValueError("lengths of arrays must match")
 
     xyz = np.array([x, y, z])
-    unorm = np.linalg.norm(xyz, axis=0)
+    u_norm = np.linalg.norm(xyz, axis=0)
 
-    return unorm
+    return u_norm

From 769a26fb8ec61155fb2bbf1b15675cdfaecb2c86 Mon Sep 17 00:00:00 2001
From: ssolson <sterlingolson@yahoo.com>
Date: Thu, 5 Sep 2024 12:04:47 -0400
Subject: [PATCH 08/31] add test__calculate_statistics

---
 mhkit/tests/utils/test_utils.py | 35 +++++++++++++++++++++++++++++++--
 mhkit/utils/__init__.py         |  1 +
 2 files changed, 34 insertions(+), 2 deletions(-)

diff --git a/mhkit/tests/utils/test_utils.py b/mhkit/tests/utils/test_utils.py
index ba2b9cb03..192c0b226 100644
--- a/mhkit/tests/utils/test_utils.py
+++ b/mhkit/tests/utils/test_utils.py
@@ -29,10 +29,10 @@ def test_get_statistics(self):
         # load in file
         df = self.data["loads"]
         df.Timestamp = pd.to_datetime(df.Timestamp)
-        df.set_index("Timestamp", inplace=True)
+        test_df = df.set_index("Timestamp")
         # run function
         means, maxs, mins, stdevs = utils.get_statistics(
-            df,
+            test_df,
             self.freq,
             period=self.period,
             vector_channels=["WD_Nacelle", "WD_NacelleMod"],
@@ -57,6 +57,37 @@ def test_get_statistics(self):
         time = pd.to_datetime(string_time)
         self.assertTrue(means.index[0] == time)
 
+    def test__calculate_statistics(self):
+        # load in file
+        df = self.data["loads"]
+        df.Timestamp = pd.to_datetime(df.Timestamp)
+        test_df = df.set_index("Timestamp")
+
+        # Select a specific data chunk (the first 10 rows)
+        datachunk = test_df.iloc[:10]
+
+        # Run the calculate_statistics function
+        stats = utils._calculate_statistics(
+            datachunk, vector_channels=["WD_Nacelle", "WD_NacelleMod"]
+        )
+
+        means = stats["means"]
+        maxs = stats["maxs"]
+        mins = stats["mins"]
+        stdevs = stats["stdevs"]
+
+        # check statistics for a specific column ('uWind_80m')
+        self.assertAlmostEqual(means["uWind_80m"], 3.226, 2)  # mean
+        self.assertAlmostEqual(maxs["uWind_80m"], 3.234, 2)  # max
+        self.assertAlmostEqual(mins["uWind_80m"], 3.221, 2)  # min
+        self.assertAlmostEqual(stdevs["uWind_80m"], 0.005049, 2)  # standard deviation
+
+        # check vector statistics for 'WD_Nacelle'
+        self.assertAlmostEqual(means["WD_Nacelle"], 157.302, 2)  # vector mean
+        self.assertAlmostEqual(
+            stdevs["WD_Nacelle"], 0.000, 2
+        )  # vector standard deviation
+
     def test_vector_statistics(self):
         # load in vector variable
         df = self.data["loads"]
diff --git a/mhkit/utils/__init__.py b/mhkit/utils/__init__.py
index b484862b0..328a33200 100644
--- a/mhkit/utils/__init__.py
+++ b/mhkit/utils/__init__.py
@@ -6,6 +6,7 @@
 
 from .time_utils import matlab_to_datetime, excel_to_datetime, index_to_datetime
 from .stat_utils import (
+    _calculate_statistics,
     get_statistics,
     vector_statistics,
     unwrap_vector,

From 2f2655b891c16e09908b9cfe3ab18838905a1630 Mon Sep 17 00:00:00 2001
From: ssolson <sterlingolson@yahoo.com>
Date: Thu, 5 Sep 2024 12:10:33 -0400
Subject: [PATCH 09/31] 10/10 lint

---
 mhkit/utils/time_utils.py | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/mhkit/utils/time_utils.py b/mhkit/utils/time_utils.py
index 1f83f8ff9..2348a9916 100644
--- a/mhkit/utils/time_utils.py
+++ b/mhkit/utils/time_utils.py
@@ -1,7 +1,16 @@
+"""
+This module provides utility functions for converting datetime formats
+from MATLAB and Excel to Python datetime formats.
+
+Functions:
+----------
+- matlab_to_datetime: Converts MATLAB datenum format to Python datetime.
+- excel_to_datetime: Converts Excel datenum format to Python datetime.
+"""
+
 import datetime as dt
 import pandas as pd
 import numpy as np
-from pecos.utils import index_to_datetime
 
 
 def matlab_to_datetime(matlab_datenum):
@@ -21,10 +30,10 @@ def matlab_to_datetime(matlab_datenum):
     # Check data types
     try:
         matlab_datenum = np.array(matlab_datenum, ndmin=1)
-    except:
-        pass
+    except (TypeError, ValueError) as e:
+        raise TypeError(f"Error converting to numpy array: {e}") from e
     if not isinstance(matlab_datenum, np.ndarray):
-        raise TypeError(f"data must be of type np.ndarray. Got: {type(data)}")
+        raise TypeError(f"data must be of type np.ndarray. Got: {type(matlab_datenum)}")
 
     # Pre-allocate
     time = []
@@ -56,8 +65,8 @@ def excel_to_datetime(excel_num):
     # Check data types
     try:
         excel_num = np.array(excel_num)
-    except:
-        pass
+    except (TypeError, ValueError) as e:
+        raise TypeError(f"Error converting to numpy array: {e}") from e
     if not isinstance(excel_num, np.ndarray):
         raise TypeError(f"excel_num must be of type np.ndarray. Got: {type(excel_num)}")
 

From e6da2ed89452821e0954eb8eb77c0dbb1e396ad5 Mon Sep 17 00:00:00 2001
From: ssolson <sterlingolson@yahoo.com>
Date: Thu, 5 Sep 2024 12:23:49 -0400
Subject: [PATCH 10/31] 10/10 pylint

---
 mhkit/utils/time_utils.py    |  3 ++
 mhkit/utils/type_handling.py | 78 +++++++++++++++++++++++++-----------
 2 files changed, 57 insertions(+), 24 deletions(-)

diff --git a/mhkit/utils/time_utils.py b/mhkit/utils/time_utils.py
index 2348a9916..8e3db5875 100644
--- a/mhkit/utils/time_utils.py
+++ b/mhkit/utils/time_utils.py
@@ -12,6 +12,9 @@
 import pandas as pd
 import numpy as np
 
+# pylint: disable=unused-import
+from pecos.utils import index_to_datetime
+
 
 def matlab_to_datetime(matlab_datenum):
     """
diff --git a/mhkit/utils/type_handling.py b/mhkit/utils/type_handling.py
index 1b06c7d12..0f928fb8f 100644
--- a/mhkit/utils/type_handling.py
+++ b/mhkit/utils/type_handling.py
@@ -1,3 +1,18 @@
+"""
+This module provides utility functions for converting various data types
+to xarray structures such as xarray.DataArray and xarray.Dataset. It also
+includes functions for handling nested dictionaries containing pandas 
+DataFrames by converting them to xarray Datasets.
+
+Functions:
+----------
+- to_numeric_array: Converts input data to a numeric NumPy array.
+- convert_to_dataset: Converts pandas or xarray data structures to xarray.Dataset.
+- convert_to_dataarray: Converts various data types to xarray.DataArray.
+- convert_nested_dict_and_pandas: Recursively converts pandas DataFrames 
+  in nested dictionaries to xarray Datasets.
+"""
+
 import numpy as np
 import pandas as pd
 import xarray as xr
@@ -27,8 +42,10 @@ def convert_to_dataset(data, name="data"):
     """
     Converts the given data to an xarray.Dataset.
 
-    This function is designed to handle inputs that can be either a pandas DataFrame, a pandas Series,
-    an xarray DataArray, or an xarray Dataset. It ensures that the output is consistently an xarray.Dataset.
+    This function is designed to handle inputs that can be either a
+    pandas DataFrame, a pandas Series, an xarray DataArray, or an
+    xarray Dataset. It ensures that the output is consistently an
+    xarray.Dataset.
 
     Parameters
     ----------
@@ -36,14 +53,15 @@ def convert_to_dataset(data, name="data"):
         The data to be converted.
 
     name: str (Optional)
-        The name to assign to the data variable in case the input is an xarray DataArray without a name.
+        The name to assign to the data variable in case the input is an
+        xarray DataArray without a name.
         Default value is 'data'.
 
     Returns
     -------
     xarray.Dataset
-        The input data converted to an xarray.Dataset. If the input is already an xarray.Dataset,
-        it is returned as is.
+        The input data converted to an xarray.Dataset. If the input is
+        already an xarray.Dataset, it is returned as is.
 
     Examples
     --------
@@ -75,7 +93,8 @@ def convert_to_dataset(data, name="data"):
     # Takes data that could be pd.DataFrame, pd.Series, xr.DataArray, or
     # xr.Dataset and converts it to xr.Dataset
     if isinstance(data, pd.DataFrame):
-        # xr.Dataset(data) is drastically faster (1e1 - 1e2x faster) than using pd.DataFrame.to_xarray()
+        # xr.Dataset(data) is drastically faster (1e1 - 1e2x faster)
+        #  than using pd.DataFrame.to_xarray()
         data = xr.Dataset(data)
 
     if isinstance(data, pd.Series):
@@ -86,7 +105,7 @@ def convert_to_dataset(data, name="data"):
 
     if isinstance(data, xr.DataArray):
         # xr.DataArray.to_dataset() breaks if the data variable is unnamed
-        if data.name == None:
+        if data.name is None:
             data.name = name
         data = data.to_dataset()
 
@@ -97,18 +116,23 @@ def convert_to_dataarray(data, name="data"):
     """
     Converts the given data to an xarray.DataArray.
 
-    This function takes in a numpy ndarray, pandas Series, pandas Dataframe, or xarray Dataset
-    and outputs an equivalent xarray DataArray. DataArrays can be passed through with no changes.
+    This function takes in a numpy ndarray, pandas Series, pandas
+    Dataframe, or xarray Dataset and outputs an equivalent xarray
+    DataArray. DataArrays can be passed through with no changes.
 
-    Xarray datasets can only be input when all variable have the same dimensions.
+    Xarray datasets can only be input when all variable have the same
+    dimensions.
 
-    Multivariate pandas Dataframes become 2D DataArrays, which is especially useful when IO
-    functions return Dataframes with an extremely large number of variable. Use the function
-    convert_to_dataset to change a multivariate Dataframe into a multivariate Dataset.
+    Multivariate pandas Dataframes become 2D DataArrays, which is
+    especially useful when IO functions return Dataframes with an
+    extremely large number of variable. Use the function
+    convert_to_dataset to change a multivariate Dataframe into a
+    multivariate Dataset.
 
     Parameters
     ----------
-    data: numpy ndarray, pandas DataFrame, pandas Series, xarray DataArray, or xarray Dataset
+    data: numpy ndarray, pandas DataFrame, pandas Series, xarray
+    DataArray, or xarray Dataset
         The data to be converted.
 
     name: str (Optional)
@@ -118,8 +142,8 @@ def convert_to_dataarray(data, name="data"):
     Returns
     -------
     xarray.DataArray
-        The input data converted to an xarray.DataArray. If the input is already an xarray.DataArray,
-        it is returned as is.
+        The input data converted to an xarray.DataArray. If the input
+        is already an xarray.DataArray, it is returned as is.
 
     Examples
     --------
@@ -152,8 +176,10 @@ def convert_to_dataarray(data, name="data"):
     # Checks pd.DataFrame input and converts to pd.Series if possible
     if isinstance(data, pd.DataFrame):
         if data.shape[1] == 1:
-            # Convert the 1D, univariate case to a Series, which will be caught by the Series conversion below.
-            # This eliminates an unnecessary variable dimension and names the DataArray with the DataFrame variable name.
+            # Convert the 1D, univariate case to a Series, which will
+            # be caught by the Series conversion below. This eliminates
+            # an unnecessary variable dimension and names the DataArray
+            # with the DataFrame variable name.
             #
             # Use iloc instead of squeeze. For DataFrames/Series with only a
             # single value, squeeze returns a scalar which is unexpected.
@@ -172,32 +198,36 @@ def convert_to_dataarray(data, name="data"):
     if isinstance(data, xr.Dataset):
         keys = list(data.keys())
         if len(keys) == 1:
-            # if only one variable, remove the "variable" dimension and rename the DataArray to simplify
+            # if only one variable, remove the "variable" dimension and
+            #  rename the DataArray to simplify
             data = data.to_array()
             data = data.sel(variable=keys[0])
             data.name = keys[0]
             data.drop_vars("variable")
         else:
             # Allow multiple variables if they have the same dimensions
-            if all([data[keys[0]].dims == data[key].dims for key in keys]):
+            if all(data[keys[0]].dims == data[key].dims for key in keys):
                 data = data.to_array()
             else:
                 raise ValueError(
-                    "Multivariate Datasets can only be input if all variables have the same dimensions."
+                    "Multivariate Datasets can only be input if all \
+                        variables have the same dimensions."
                 )
 
     # Converts pd.Series to xr.DataArray
     if isinstance(data, pd.Series):
         data = data.to_xarray()
 
-    # Converts np.ndarray to xr.DataArray. Assigns a simple 0-based dimension named index to match how pandas converts to xarray
+    # Converts np.ndarray to xr.DataArray. Assigns a simple 0-based
+    # dimension named index to match how pandas converts to xarray
     if isinstance(data, np.ndarray):
         data = xr.DataArray(
             data=data, dims="index", coords={"index": np.arange(len(data))}
         )
 
-    # If there's no data name, add one to prevent issues calling or converting to a Dataset later on
-    if data.name == None:
+    # If there's no data name, add one to prevent issues calling or
+    # converting to a Dataset later on
+    if data.name is None:
         data.name = name
 
     return data

From 1c3602af11b960e8641e7b5f2972b93324f8c286 Mon Sep 17 00:00:00 2001
From: ssolson <sterlingolson@yahoo.com>
Date: Fri, 6 Sep 2024 08:58:14 -0400
Subject: [PATCH 11/31] handle cache returns None now

---
 mhkit/wave/io/cdip.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mhkit/wave/io/cdip.py b/mhkit/wave/io/cdip.py
index 92a1d47e6..020664aac 100644
--- a/mhkit/wave/io/cdip.py
+++ b/mhkit/wave/io/cdip.py
@@ -330,7 +330,7 @@ def request_parse_workflow(
             cache_content={"data": None, "metadata": None, "write_json": None},
         )
 
-        if data[:2] == (None, None):
+        if data is None:
             data = get_netcdf_variables(
                 nc,
                 start_date=start_date,
@@ -361,7 +361,7 @@ def request_parse_workflow(
                 cache_dir,
                 cache_content={"data": None, "metadata": None, "write_json": None},
             )
-            if year_data[:2] == (None, None):
+            if year_data is None:
                 year_data = get_netcdf_variables(
                     nc,
                     start_date=start_date,

From 0e343d556d96c19868de95935b09b3fe5019dcc6 Mon Sep 17 00:00:00 2001
From: ssolson <sterlingolson@yahoo.com>
Date: Fri, 6 Sep 2024 10:28:20 -0400
Subject: [PATCH 12/31] fix logic around None passed to handle_cache

---
 mhkit/utils/cache.py | 40 ++++------------------------------------
 1 file changed, 4 insertions(+), 36 deletions(-)

diff --git a/mhkit/utils/cache.py b/mhkit/utils/cache.py
index de30a4e7e..053f35c5e 100644
--- a/mhkit/utils/cache.py
+++ b/mhkit/utils/cache.py
@@ -204,40 +204,6 @@ def handle_caching(
     """
     Handles caching of data to avoid redundant network requests or
     computations.
-
-    The function checks if a cache file exists for the given parameters.
-    If it does, the function will load data from the cache file, unless
-    the `clear_cache_file` parameter is set to `True`, in which case the
-    cache file is cleared. If the cache file does not exist and the
-    `data` parameter is not `None`, the function will store the
-    provided data in a cache file.
-
-    Parameters
-    ----------
-    hash_params : str
-        The parameters to be hashed and used as the filename for the cache file.
-    cache_dir : str
-        The directory where the cache files are stored.
-    cache_content : dict or None
-        Dictionary containing 'data' (pandas DataFrame or None), 'metadata'
-        (dict or None), and 'write_json' (str or None). If `None`, the function
-        will attempt to load data from the cache file.
-    clear_cache_file : bool
-        If `True`, the cache file for the given parameters will be cleared.
-
-    Returns
-    -------
-    data : pandas DataFrame or None
-        The data loaded from the cache file. If data was provided as a
-        parameter, the same data will be returned. If the cache file
-        does not exist and no data was provided, `None` will be returned.
-    metadata : dict or None
-        The metadata loaded from the cache file. If metadata was provided
-        as a parameter, the same metadata will be returned. If the cache
-        file does not exist and no metadata was provided, `None` will be
-        returned.
-    cache_filepath : str
-        The path to the cache file.
     """
 
     # Initialize data and metadata to None to avoid pylint errors
@@ -309,8 +275,10 @@ def _write_cache(data, metadata, file_extension, cache_filepath):
     # Clear cache if requested
     _clear_cache(cache_filepath)
 
-    # Check if cache file exists and load if no data provided
-    if os.path.isfile(cache_filepath) and cache_content is None:
+    # If cache file exists and cache_content["data"] is None, load from cache
+    if os.path.isfile(cache_filepath) and (
+        cache_content is None or cache_content["data"] is None
+    ):
         return _load_cache(file_extension, cache_filepath) + (cache_filepath,)
 
     # Store data in cache if provided

From 486708dcc629d327dee4371584d223c1da295112 Mon Sep 17 00:00:00 2001
From: ssolson <sterlingolson@yahoo.com>
Date: Fri, 6 Sep 2024 10:34:23 -0400
Subject: [PATCH 13/31] back to index

---
 mhkit/tests/tidal/test_io.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mhkit/tests/tidal/test_io.py b/mhkit/tests/tidal/test_io.py
index 5ce38e4fa..280b847ce 100644
--- a/mhkit/tests/tidal/test_io.py
+++ b/mhkit/tests/tidal/test_io.py
@@ -103,11 +103,11 @@ def test_request_noaa_data_basic_xarray(self):
         )
         # Check if the variable sets are equal
         data_variables = list(data.variables)
-        required_variables = ["t", "s", "d", "b"]
+        required_variables = ["index", "s", "d", "b"]
         data_variables_set = set(data_variables)
         required_variables_set = set(required_variables)
         self.assertTrue(data_variables_set == required_variables_set)
-        self.assertEqual(len(data["t"]), 183)
+        self.assertEqual(len(data["index"]), 183)
         self.assertEqual(data.attrs["id"], "s08010")
 
     def test_request_noaa_data_write_json(self):

From bdf74b333addb6da4249a53ca87b265e77e6860a Mon Sep 17 00:00:00 2001
From: ssolson <sterlingolson@yahoo.com>
Date: Fri, 6 Sep 2024 11:11:36 -0400
Subject: [PATCH 14/31] data no longer returned as list

---
 mhkit/wave/io/cdip.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/mhkit/wave/io/cdip.py b/mhkit/wave/io/cdip.py
index 020664aac..52322787e 100644
--- a/mhkit/wave/io/cdip.py
+++ b/mhkit/wave/io/cdip.py
@@ -344,8 +344,6 @@ def request_parse_workflow(
                 cache_dir,
                 cache_content={"data": data, "metadata": None, "write_json": None},
             )
-        else:
-            data = data[0]
 
     else:
         data = {"data": {}, "metadata": {}}
@@ -380,8 +378,6 @@ def request_parse_workflow(
                         "write_json": None,
                     },
                 )
-            else:
-                year_data = year_data[0]
             multiyear_data[year] = year_data["data"]
 
         for data_key in year_data["data"].keys():

From dfad8dc4af7e973f3341b882c51672d45d8c493e Mon Sep 17 00:00:00 2001
From: ssolson <sterlingolson@yahoo.com>
Date: Fri, 6 Sep 2024 11:32:43 -0400
Subject: [PATCH 15/31] remove old cache_utils function

---
 mhkit/utils/cache.py | 148 -------------------------------------------
 1 file changed, 148 deletions(-)

diff --git a/mhkit/utils/cache.py b/mhkit/utils/cache.py
index 053f35c5e..17d2a0568 100644
--- a/mhkit/utils/cache.py
+++ b/mhkit/utils/cache.py
@@ -42,159 +42,11 @@
 import hashlib
 import json
 import os
-
-# import re
 import shutil
 import pickle
 import pandas as pd
 
 
-# def old_handle_caching(
-#     hash_params,
-#     cache_dir,
-#     data=None,
-#     metadata=None,
-#     write_json=None,
-#     clear_cache_file=False,
-# ):
-#     """
-#     Handles caching of data to avoid redundant network requests or
-#     computations.
-
-#     The function checks if a cache file exists for the given parameters.
-#     If it does, the function will load data from the cache file, unless
-#     the `clear_cache_file` parameter is set to `True`, in which case the
-#     cache file is cleared. If the cache file does not exist and the
-#     `data` parameter is not `None`, the function will store the
-#     provided data in a cache file.
-
-#     Parameters
-#     ----------
-#     hash_params : str
-#         The parameters to be hashed and used as the filename for the cache file.
-#     cache_dir : str
-#         The directory where the cache files are stored.
-#     data : pandas DataFrame or None
-#         The data to be stored in the cache file. If `None`, the function
-#         will attempt to load data from the cache file.
-#     metadata : dict or None
-#         Metadata associated with the data. This will be stored in the
-#         cache file along with the data.
-#     write_json : str or None
-#         If specified, the cache file will be copied to a file with this name.
-#     clear_cache_file : bool
-#         If `True`, the cache file for the given parameters will be cleared.
-
-#     Returns
-#     -------
-#     data : pandas DataFrame or None
-#         The data loaded from the cache file. If data was provided as a
-#         parameter, the same data will be returned. If the cache file
-#         does not exist and no data was provided, `None` will be returned.
-#     metadata : dict or None
-#         The metadata loaded from the cache file. If metadata was provided
-#         as a parameter, the same metadata will be returned. If the cache
-#         file does not exist and no metadata was provided, `None` will be
-#         returned.
-#     cache_filepath : str
-#         The path to the cache file.
-#     """
-
-#     # Check if 'cdip' is in cache_dir, then use .pkl instead of .json
-#     file_extension = (
-#         ".pkl"
-#         if "cdip" in cache_dir or "hindcast" in cache_dir or "ndbc" in cache_dir
-#         else ".json"
-#     )
-
-#     # Make cache directory if it doesn't exist
-#     if not os.path.isdir(cache_dir):
-#         os.makedirs(cache_dir)
-
-#     # Create a unique filename based on the function parameters
-#     cache_filename = (
-#         hashlib.md5(hash_params.encode("utf-8")).hexdigest() + file_extension
-#     )
-#     cache_filepath = os.path.join(cache_dir, cache_filename)
-
-#     # If clear_cache_file is True, remove the cache file for this request
-#     if clear_cache_file and os.path.isfile(cache_filepath):
-#         os.remove(cache_filepath)
-#         print(f"Cleared cache for {cache_filepath}")
-
-#     # If a cached file exists, load and return the data from the file
-#     if os.path.isfile(cache_filepath) and data is None:
-#         if file_extension == ".json":
-#             with open(cache_filepath, encoding="utf-8") as f:
-#                 json_data = json.load(f)
-
-#             # Extract metadata if it exists
-#             if "metadata" in json_data:
-#                 metadata = json_data.pop("metadata", None)
-
-#             # Check if index is datetime formatted
-#             if all(
-#                 re.match(r"\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}", str(dt))
-#                 for dt in json_data["index"]
-#             ):
-#                 data = pd.DataFrame(
-#                     json_data["data"],
-#                     index=pd.to_datetime(json_data["index"]),
-#                     columns=json_data["columns"],
-#                 )
-#             else:
-#                 data = pd.DataFrame(
-#                     json_data["data"],
-#                     index=json_data["index"],
-#                     columns=json_data["columns"],
-#                 )
-
-#             # Convert the rest to DataFrame
-#             data = pd.DataFrame(
-#                 json_data["data"],
-#                 index=pd.to_datetime(json_data["index"]),
-#                 columns=json_data["columns"],
-#             )
-
-#         elif file_extension == ".pkl":
-#             with open(cache_filepath, "rb") as f:
-#                 data, metadata = pickle.load(f)
-
-#         if write_json:
-#             shutil.copy(cache_filepath, write_json)
-
-#         return data, metadata, cache_filepath
-
-#     # If a cached file does not exist and data is provided,
-#     # store the data in a cache file
-#     if data is not None:
-#         if file_extension == ".json":
-#             # Convert DataFrame to python dict
-#             py_data = data.to_dict(orient="split")
-#             # Add metadata to py_data
-#             py_data["metadata"] = metadata
-#             # Check if index is datetime indexed
-#             if isinstance(data.index, pd.DatetimeIndex):
-#                 py_data["index"] = [
-#                     dt.strftime("%Y-%m-%d %H:%M:%S") for dt in py_data["index"]
-#                 ]
-#             else:
-#                 py_data["index"] = list(data.index)
-#             with open(cache_filepath, "w", encoding="utf-8") as f:
-#                 json.dump(py_data, f)
-
-#         elif file_extension == ".pkl":
-#             with open(cache_filepath, "wb") as f:
-#                 pickle.dump((data, metadata), f)
-
-#         if write_json:
-#             shutil.copy(cache_filepath, write_json)
-
-#         return data, metadata, cache_filepath
-#     # If data is not provided and the cache file doesn't exist, return cache_filepath
-#     return None, None, cache_filepath
-
-
 def handle_caching(
     hash_params,
     cache_dir,

From 8d551e4aec16b69c871adfac8f6ebaac4f2ff046 Mon Sep 17 00:00:00 2001
From: ssolson <sterlingolson@yahoo.com>
Date: Fri, 6 Sep 2024 11:34:40 -0400
Subject: [PATCH 16/31] clean up

---
 mhkit/utils/cache.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/mhkit/utils/cache.py b/mhkit/utils/cache.py
index 17d2a0568..706888d94 100644
--- a/mhkit/utils/cache.py
+++ b/mhkit/utils/cache.py
@@ -58,7 +58,6 @@ def handle_caching(
     computations.
     """
 
-    # Initialize data and metadata to None to avoid pylint errors
     data = None
     metadata = None
 

From cf54e12eb6149f16db6c2d71a4cfc85ae1995941 Mon Sep 17 00:00:00 2001
From: ssolson <sterlingolson@yahoo.com>
Date: Mon, 9 Sep 2024 10:02:45 -0400
Subject: [PATCH 17/31] type hints

---
 mhkit/utils/cache.py         | 29 +++++++++++---
 mhkit/utils/stat_utils.py    | 33 +++++++++++++---
 mhkit/utils/time_utils.py    |  9 ++++-
 mhkit/utils/type_handling.py | 18 +++++++--
 mhkit/utils/upcrossing.py    | 76 +++++++++++++++++++++++++-----------
 5 files changed, 124 insertions(+), 41 deletions(-)

diff --git a/mhkit/utils/cache.py b/mhkit/utils/cache.py
index 706888d94..3d6bc2949 100644
--- a/mhkit/utils/cache.py
+++ b/mhkit/utils/cache.py
@@ -39,6 +39,7 @@
 Date: 2023-09-26
 """
 
+from typing import Optional, Tuple, Dict, Any
 import hashlib
 import json
 import os
@@ -48,14 +49,30 @@
 
 
 def handle_caching(
-    hash_params,
-    cache_dir,
-    cache_content=None,
-    clear_cache_file=False,
-):
+    hash_params: str,
+    cache_dir: str,
+    cache_content: Optional[Dict[str, Any]] = None,
+    clear_cache_file: bool = False,
+) -> Tuple[Optional[pd.DataFrame], Optional[Dict[str, Any]], str]:
     """
     Handles caching of data to avoid redundant network requests or
     computations.
+
+    Parameters
+    ----------
+    hash_params : str
+        Parameters to generate the cache file hash.
+    cache_dir : str
+        Directory where cache files are stored.
+    cache_content : Optional[Dict[str, Any]], optional
+        Content to be cached. Should contain 'data', 'metadata', and 'write_json'.
+    clear_cache_file : bool
+        Whether to clear the existing cache.
+
+    Returns
+    -------
+    Tuple[Optional[pd.DataFrame], Optional[Dict[str, Any]], str]
+        Cached data, metadata, and cache file path.
     """
 
     data = None
@@ -148,7 +165,7 @@ def _write_cache(data, metadata, file_extension, cache_filepath):
     return None, None, cache_filepath
 
 
-def clear_cache(specific_dir=None):
+def clear_cache(specific_dir: Optional[str] = None) -> None:
     """
     Clears the cache.
 
diff --git a/mhkit/utils/stat_utils.py b/mhkit/utils/stat_utils.py
index 681636680..972a84f2a 100644
--- a/mhkit/utils/stat_utils.py
+++ b/mhkit/utils/stat_utils.py
@@ -15,12 +15,15 @@
 - unorm: Computes root mean squared value of 3D vectors.
 """
 
+from typing import List, Dict, Optional, Tuple, Union
 import pandas as pd
 import numpy as np
 from mhkit import qc
 
 
-def _calculate_statistics(datachunk, vector_channels):
+def _calculate_statistics(
+    datachunk: pd.DataFrame, vector_channels: List[str]
+) -> Dict[str, Union[pd.Series, float]]:
     """
     Calculate the mean, max, min, and standard deviation for the given datachunk.
     Also calculate vector statistics for vector_channels.
@@ -51,7 +54,12 @@ def _calculate_statistics(datachunk, vector_channels):
     return {"means": means, "maxs": maxs, "mins": mins, "stdevs": stdevs}
 
 
-def get_statistics(data, freq, period=600, vector_channels=None):
+def get_statistics(
+    data: pd.DataFrame,
+    freq: Union[float, int],
+    period: Union[float, int] = 600,
+    vector_channels: Optional[Union[str, List[str]]] = None,
+) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame, pd.DataFrame]:
     """
     Calculate mean, max, min and stdev statistics of continuous data for a
     given statistical window. Default length of statistical window (period) is
@@ -135,7 +143,9 @@ def get_statistics(data, freq, period=600, vector_channels=None):
     return means, maxs, mins, stdevs
 
 
-def vector_statistics(data):
+def vector_statistics(
+    data: Union[pd.Series, np.ndarray, list]
+) -> Tuple[np.ndarray, np.ndarray]:
     """
     Function used to calculate statistics for vector/directional channels based on
     routine from Campbell data logger and Yamartino algorithm
@@ -181,7 +191,7 @@ def vector_statistics(data):
     return vector_avg, vector_std
 
 
-def unwrap_vector(data):
+def unwrap_vector(data: Union[pd.Series, np.ndarray, list]) -> np.ndarray:
     """
     Function used to unwrap vectors into 0-360 deg range
 
@@ -216,7 +226,14 @@ def unwrap_vector(data):
     return data
 
 
-def magnitude_phase(x, y, z=None):
+def magnitude_phase(
+    x: Union[float, int, np.ndarray],
+    y: Union[float, int, np.ndarray],
+    z: Optional[Union[float, int, np.ndarray]] = None,
+) -> Union[
+    Tuple[Union[float, np.ndarray], Union[float, np.ndarray]],
+    Tuple[Union[float, np.ndarray], Union[float, np.ndarray], Union[float, np.ndarray]],
+]:
     """
     Retuns magnitude and phase in two or three dimensions.
 
@@ -267,7 +284,11 @@ def magnitude_phase(x, y, z=None):
     return mag, theta
 
 
-def unorm(x, y, z):
+def unorm(
+    x: Union[np.ndarray, np.float64, pd.Series],
+    y: Union[np.ndarray, np.float64, pd.Series],
+    z: Union[np.ndarray, np.float64, pd.Series],
+) -> Union[np.ndarray, np.float64]:
     """
     Calculates the root mean squared value given three arrays.
 
diff --git a/mhkit/utils/time_utils.py b/mhkit/utils/time_utils.py
index 8e3db5875..3eb69f7e1 100644
--- a/mhkit/utils/time_utils.py
+++ b/mhkit/utils/time_utils.py
@@ -8,6 +8,7 @@
 - excel_to_datetime: Converts Excel datenum format to Python datetime.
 """
 
+from typing import Union
 import datetime as dt
 import pandas as pd
 import numpy as np
@@ -16,7 +17,9 @@
 from pecos.utils import index_to_datetime
 
 
-def matlab_to_datetime(matlab_datenum):
+def matlab_to_datetime(
+    matlab_datenum: Union[np.ndarray, list, float, int]
+) -> pd.DatetimeIndex:
     """
     Convert MATLAB datenum format to Python datetime
 
@@ -51,7 +54,9 @@ def matlab_to_datetime(matlab_datenum):
     return time
 
 
-def excel_to_datetime(excel_num):
+def excel_to_datetime(
+    excel_num: Union[np.ndarray, list, float, int]
+) -> pd.DatetimeIndex:
     """
     Convert Excel datenum format to Python datetime
 
diff --git a/mhkit/utils/type_handling.py b/mhkit/utils/type_handling.py
index 0f928fb8f..680c2c563 100644
--- a/mhkit/utils/type_handling.py
+++ b/mhkit/utils/type_handling.py
@@ -13,12 +13,15 @@
   in nested dictionaries to xarray Datasets.
 """
 
+from typing import Union, Dict, Any
 import numpy as np
 import pandas as pd
 import xarray as xr
 
 
-def to_numeric_array(data, name):
+def to_numeric_array(
+    data: Union[list, np.ndarray, pd.Series, xr.DataArray], name: str
+) -> np.ndarray:
     """
     Convert input data to a numeric array, ensuring all elements are numeric.
     """
@@ -38,7 +41,9 @@ def to_numeric_array(data, name):
     return data
 
 
-def convert_to_dataset(data, name="data"):
+def convert_to_dataset(
+    data: Union[pd.DataFrame, pd.Series, xr.DataArray, xr.Dataset], name: str = "data"
+) -> xr.Dataset:
     """
     Converts the given data to an xarray.Dataset.
 
@@ -112,7 +117,10 @@ def convert_to_dataset(data, name="data"):
     return data
 
 
-def convert_to_dataarray(data, name="data"):
+def convert_to_dataarray(
+    data: Union[np.ndarray, pd.DataFrame, pd.Series, xr.DataArray, xr.Dataset],
+    name: str = "data",
+) -> xr.DataArray:
     """
     Converts the given data to an xarray.DataArray.
 
@@ -233,7 +241,9 @@ def convert_to_dataarray(data, name="data"):
     return data
 
 
-def convert_nested_dict_and_pandas(data):
+def convert_nested_dict_and_pandas(
+    data: Dict[str, Union[pd.DataFrame, Dict[str, Any]]]
+) -> Dict[str, Union[xr.Dataset, Dict[str, Any]]]:
     """
     Recursively searches inside nested dictionaries for pandas DataFrames to
     convert to xarray Datasets. Typically called by wave.io functions that read
diff --git a/mhkit/utils/upcrossing.py b/mhkit/utils/upcrossing.py
index 18db3f142..1c5eea03f 100644
--- a/mhkit/utils/upcrossing.py
+++ b/mhkit/utils/upcrossing.py
@@ -7,21 +7,12 @@
 Key Functions:
 --------------
 - `upcrossing`: Finds the zero upcrossing points.
-  
 - `peaks`: Finds the peaks between zero crossings.
-  
 - `troughs`: Finds the troughs between zero crossings.
-  
 - `heights`: Calculates the height between zero crossings.
-  
 - `periods`: Calculates the period between zero crossings.
-  
 - `custom`: Applies a custom, user-defined function between zero crossings.
-  
-Dependencies:
--------------
-- numpy: Data analysis
-  
+   
 Author: 
 -------
 mbruggs
@@ -34,10 +25,36 @@
 
 """
 
+from typing import Callable, Optional
 import numpy as np
 
 
-def _apply(t, data, f, inds):
+def _apply(
+    t: np.ndarray,
+    data: np.ndarray,
+    f: Callable[[int, int], float],
+    inds: Optional[np.ndarray] = None,
+) -> np.ndarray:
+    """
+    Apply a function `f` over intervals defined by `inds`. If `inds` is None,
+    compute the indices using the upcrossing function.
+
+    Parameters
+    ----------
+    t : np.ndarray
+        Time array.
+    data : np.ndarray
+        Data array.
+    f : Callable[[int, int], float]
+        A function to apply to pairs of indices (start, end).
+    inds : np.ndarray, optional
+        Indices that define the intervals. If None, `upcrossing` is used to generate them.
+
+    Returns
+    -------
+    np.ndarray
+        Array of values resulting from applying `f` over the intervals.
+    """
     if inds is None:
         inds = upcrossing(t, data)
 
@@ -50,7 +67,7 @@ def _apply(t, data, f, inds):
     return vals
 
 
-def upcrossing(t, data):
+def upcrossing(t: np.ndarray, data: np.ndarray) -> np.ndarray:
     """
     Finds the zero upcrossing points.
 
@@ -86,7 +103,9 @@ def upcrossing(t, data):
     return zero_upcrossings_index
 
 
-def peaks(t, data, inds=None):
+def peaks(
+    t: np.ndarray, data: np.ndarray, inds: Optional[np.ndarray] = None
+) -> np.ndarray:
     """
     Finds the peaks between zero crossings.
 
@@ -96,7 +115,7 @@ def peaks(t, data, inds=None):
         Time array.
     data: np.array
         Signal time-series.
-    inds: np.array
+    inds : np.ndarray, optional
         Optional indices for the upcrossing. Useful
         when using several of the upcrossing methods
         to avoid repeating the upcrossing analysis
@@ -117,7 +136,9 @@ def peaks(t, data, inds=None):
     return _apply(t, data, lambda ind1, ind2: np.max(data[ind1:ind2]), inds)
 
 
-def troughs(t, data, inds=None):
+def troughs(
+    t: np.ndarray, data: np.ndarray, inds: Optional[np.ndarray] = None
+) -> np.ndarray:
     """
     Finds the troughs between zero crossings.
 
@@ -127,7 +148,7 @@ def troughs(t, data, inds=None):
         Time array.
     data: np.array
         Signal time-series.
-    inds: np.array
+    inds: np.array, optional
         Optional indices for the upcrossing. Useful
         when using several of the upcrossing methods
         to avoid repeating the upcrossing analysis
@@ -148,7 +169,9 @@ def troughs(t, data, inds=None):
     return _apply(t, data, lambda ind1, ind2: np.min(data[ind1:ind2]), inds)
 
 
-def heights(t, data, inds=None):
+def heights(
+    t: np.ndarray, data: np.ndarray, inds: Optional[np.ndarray] = None
+) -> np.ndarray:
     """
     Calculates the height between zero crossings.
 
@@ -161,7 +184,7 @@ def heights(t, data, inds=None):
         Time array.
     data: np.array
         Signal time-series.
-    inds: np.array
+    inds: np.array, optional
         Optional indices for the upcrossing. Useful
         when using several of the upcrossing methods
         to avoid repeating the upcrossing analysis
@@ -184,7 +207,9 @@ def func(ind1, ind2):
     return _apply(t, data, func, inds)
 
 
-def periods(t, data, inds=None):
+def periods(
+    t: np.ndarray, data: np.ndarray, inds: Optional[np.ndarray] = None
+) -> np.ndarray:
     """
     Calculates the period between zero crossings.
 
@@ -194,7 +219,7 @@ def periods(t, data, inds=None):
         Time array.
     data: np.array
         Signal time-series.
-    inds: np.array
+    inds: np.array, optional
         Optional indices for the upcrossing. Useful
         when using several of the upcrossing methods
         to avoid repeating the upcrossing analysis
@@ -214,7 +239,12 @@ def periods(t, data, inds=None):
     return _apply(t, data, lambda ind1, ind2: t[ind2] - t[ind1], inds)
 
 
-def custom(t, data, func, inds=None):
+def custom(
+    t: np.ndarray,
+    data: np.ndarray,
+    func: Callable[[int, int], np.ndarray],
+    inds: Optional[np.ndarray] = None,
+) -> np.ndarray:
     """
     Applies a custom function to the timeseries data between upcrossing points.
 
@@ -224,11 +254,11 @@ def custom(t, data, func, inds=None):
         Time array.
     data: np.array
         Signal time-series.
-    func: f(ind1, ind2) -> np.array
+    func: Callable[[int, int], np.ndarray]
         Function to apply between the zero crossing periods
         given t[ind1], t[ind2], where ind1 < ind2, correspond
         to the start and end of an upcrossing section.
-    inds: np.array
+    inds: np.array, optional
         Optional indices for the upcrossing. Useful
         when using several of the upcrossing methods
         to avoid repeating the upcrossing analysis

From 44416f5ba30901eeff7c740d091ddcf40be38b71 Mon Sep 17 00:00:00 2001
From: ssolson <sterlingolson@yahoo.com>
Date: Tue, 12 Nov 2024 09:10:47 -0500
Subject: [PATCH 18/31] fix pylint iussues

---
 mhkit/utils/type_handling.py | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/mhkit/utils/type_handling.py b/mhkit/utils/type_handling.py
index 046cd07d0..09ad5ccac 100644
--- a/mhkit/utils/type_handling.py
+++ b/mhkit/utils/type_handling.py
@@ -195,12 +195,11 @@ def convert_to_dataarray(
             data = data.iloc[:, 0]
         else:
             # With this conversion, dataframe columns always become "dim_1".
-            # Rename to "variable" to match how multiple Dataset variables get converted into a DataArray dimension
+            # Rename to "variable" to match how multiple Dataset variables get
+            # converted into a DataArray dimension
             data = xr.DataArray(data)
             if data.dims[1] == "dim_1":
-                # Slight chance there is already a name for the columns
                 data = data.rename({"dim_1": "variable"})
-
     # Checks xr.Dataset input and converts to xr.DataArray if possible
     if isinstance(data, xr.Dataset):
         keys = list(data.keys())
@@ -209,10 +208,10 @@ def convert_to_dataarray(
             data = data[keys[0]]
         else:
             # Allow multiple variables if they have the same dimensions
-            if all([data[keys[0]].dims == data[key].dims for key in keys]):
-                data = (
-                    data.to_array().T
-                )  # transpose so that the new "variable dimension" is the last dimension (matches DataFrame to DataArray behavior)
+            # transpose so that the new "variable dimension" is the last
+            # dimension (matches DataFrame to DataArray behavior)
+            if all(data[keys[0]].dims == data[key].dims for key in keys):
+                data = data.to_array().T
             else:
                 raise ValueError(
                     "Multivariate Datasets can only be input if all \
@@ -232,8 +231,7 @@ def convert_to_dataarray(
 
     # If there's no data name, add one to prevent issues calling or
     # converting to a Dataset later on
-    if data.name is None:
-        data.name = name
+    data.name = data.name if data.name is not None else name
 
     return data
 

From ef04cc2488c87d4d1010254df337e5a9da8be7f2 Mon Sep 17 00:00:00 2001
From: ssolson <sterlingolson@yahoo.com>
Date: Wed, 13 Nov 2024 07:48:56 -0500
Subject: [PATCH 19/31] clean up package installation

---
 .github/workflows/main.yml | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 44d8ac03a..5bfc4bb95 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -156,14 +156,13 @@ jobs:
       - name: Setup Conda environment
         shell: bash -l {0}
         run: |
-          conda install numpy cython pip pytest hdf5 libnetcdf cftime netcdf4 coverage --strict-channel-priority
+          conda install numpy cython pip pytest hdf5 libnetcdf cftime netcdf4 coverage coveralls --strict-channel-priority
           pip install -e . --no-deps --force-reinstall
 
       - name: Install dependencies
         shell: bash -l {0}
         run: |
           python -m pip install --upgrade pip wheel
-          pip install coverage pytest coveralls .
 
       - name: Prepare Wind Hindcast data
         shell: bash -l {0}

From 7064645e141316508003c448782f98c0ba3dfdbd Mon Sep 17 00:00:00 2001
From: ssolson <sterlingolson@yahoo.com>
Date: Wed, 13 Nov 2024 08:08:18 -0500
Subject: [PATCH 20/31] change env name to mhkit-env

---
 environment.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/environment.yml b/environment.yml
index 741f80ffc..b8607ae17 100644
--- a/environment.yml
+++ b/environment.yml
@@ -1,4 +1,4 @@
-name: myenv
+name: mhkit-env
 channels:
   - conda-forge
   - defaults

From 7aaedea7b304b3b5cedc6582b061f6e64eb44e84 Mon Sep 17 00:00:00 2001
From: ssolson <sterlingolson@yahoo.com>
Date: Wed, 13 Nov 2024 08:08:38 -0500
Subject: [PATCH 21/31] clean up installation

---
 .github/workflows/main.yml | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 5bfc4bb95..29fd5762a 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -153,20 +153,28 @@ jobs:
           python-version: ${{ env.PYTHON_VER }}
           use-only-tar-bz2: true
 
-      - name: Setup Conda environment
+      - name: Create MHKiT Conda environment
         shell: bash -l {0}
         run: |
-          conda install numpy cython pip pytest hdf5 libnetcdf cftime netcdf4 coverage coveralls --strict-channel-priority
-          pip install -e . --no-deps --force-reinstall
+          conda env create -f environment.yml
+          conda activate mhkit-env
 
-      - name: Install dependencies
+      - name: Install testing dependencies
         shell: bash -l {0}
         run: |
-          python -m pip install --upgrade pip wheel
+          conda activate mhkit-env
+          conda install -y pytest coverage coveralls
+
+      - name: Install mhkit
+        shell: bash -l {0}
+        run: |
+          conda activate mhkit-env
+          pip install -e . --no-deps
 
       - name: Prepare Wind Hindcast data
         shell: bash -l {0}
         run: |
+          conda activate mhkit-env
           pytest mhkit/tests/wave/io/hindcast/test_wind_toolkit.py
 
       - name: Upload Wind Hindcast data as artifact

From 9e0d63dcfaecdbdbbaef9cab843b34ad5a6ff489 Mon Sep 17 00:00:00 2001
From: ssolson <sterlingolson@yahoo.com>
Date: Wed, 13 Nov 2024 08:25:37 -0500
Subject: [PATCH 22/31] add cf-staging label

---
 environment.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/environment.yml b/environment.yml
index b8607ae17..b360b14ee 100644
--- a/environment.yml
+++ b/environment.yml
@@ -1,5 +1,6 @@
 name: mhkit-env
 channels:
+  - conda-forge/label/cf-staging
   - conda-forge
   - defaults
 dependencies:

From 03c95528a2021af7f4b37c3bbde131c118afdfba Mon Sep 17 00:00:00 2001
From: ssolson <sterlingolson@yahoo.com>
Date: Wed, 13 Nov 2024 09:12:29 -0500
Subject: [PATCH 23/31] Use conda env file in all tests

---
 .github/workflows/main.yml | 105 +++++++++++++++++++++++++------------
 1 file changed, 71 insertions(+), 34 deletions(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 29fd5762a..ccf3cd30f 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -67,21 +67,28 @@ jobs:
           activate-environment: TESTconda
           use-only-tar-bz2: true
 
-      - name: Setup Conda environment
+      - name: Create MHKiT Conda environment
         shell: bash -l {0}
         run: |
-          conda install numpy cython pip hdf5 libnetcdf cftime netcdf4 --strict-channel-priority
-          pip install -e . --force-reinstall
+          conda env create -f environment.yml
+          conda activate mhkit-env
 
-      - name: Install dependencies
+      - name: Install testing dependencies
         shell: bash -l {0}
         run: |
-          python -m pip install --upgrade pip wheel
-          pip install coverage pytest coveralls .
+          conda activate mhkit-env
+          conda install -y pytest coverage coveralls
+
+      - name: Install mhkit
+        shell: bash -l {0}
+        run: |
+          conda activate mhkit-env
+          pip install -e . --no-deps
 
       - name: Prepare non-hindcast API data
         shell: bash -l {0}
         run: |
+          conda activate mhkit-env
           pytest mhkit/tests/river/test_io_usgs.py
           pytest mhkit/tests/tidal/test_io.py
           pytest mhkit/tests/wave/io/test_cdip.py
@@ -111,21 +118,28 @@ jobs:
           python-version: ${{ env.PYTHON_VER }}
           use-only-tar-bz2: true
 
-      - name: Setup Conda environment
+      - name: Create MHKiT Conda environment
         shell: bash -l {0}
         run: |
-          conda install numpy cython pip pytest hdf5 libnetcdf cftime netcdf4 coverage --strict-channel-priority
-          pip install -e . --force-reinstall
+          conda env create -f environment.yml
+          conda activate mhkit-env
 
-      - name: Install dependencies
+      - name: Install testing dependencies
         shell: bash -l {0}
         run: |
-          python -m pip install --upgrade pip wheel
-          pip install coverage pytest coveralls .
+          conda activate mhkit-env
+          conda install -y pytest coverage coveralls
+
+      - name: Install mhkit
+        shell: bash -l {0}
+        run: |
+          conda activate mhkit-env
+          pip install -e . --no-deps
 
       - name: Prepare Wave Hindcast data
         shell: bash -l {0}
         run: |
+          conda activate mhkit-env
           pytest mhkit/tests/wave/io/hindcast/test_hindcast.py
 
       - name: Upload Wave Hindcast data as artifact
@@ -208,21 +222,28 @@ jobs:
           python-version: ${{ matrix.python-version }}
           use-only-tar-bz2: false
 
-      - name: Create and setup Conda environment
+      - name: Create MHKiT Conda environment
         shell: bash -l {0}
         run: |
-          conda install -c conda-forge pytest coverage=7.5.0 coveralls --strict-channel-priority
-          pip install -e . --force-reinstall
+          conda env create -f environment.yml
+          conda activate mhkit-env
 
-      - name: Download data from artifact
-        uses: actions/download-artifact@v4
-        with:
-          name: data
-          path: ~/.cache/mhkit
+      - name: Install testing dependencies
+        shell: bash -l {0}
+        run: |
+          conda activate mhkit-env
+          conda install -y pytest coverage coveralls
+
+      - name: Install mhkit
+        shell: bash -l {0}
+        run: |
+          conda activate mhkit-env
+          pip install -e . --no-deps
 
       - name: Run pytest & generate coverage report
         shell: bash -l {0}
         run: |
+          conda activate mhkit-env
           coverage run --rcfile=.github/workflows/.coveragerc --source=./mhkit/ -m pytest -c .github/workflows/pytest.ini
           coverage lcov
 
@@ -317,11 +338,23 @@ jobs:
           python-version: ${{ matrix.python-version }}
           use-only-tar-bz2: false
 
-      - name: Setup Conda environment
+      - name: Create MHKiT Conda environment
+        shell: bash -l {0}
+        run: |
+          conda env create -f environment.yml
+          conda activate mhkit-env
+
+      - name: Install testing dependencies
         shell: bash -l {0}
         run: |
-          conda install -c conda-forge pytest coverage=7.5.0 coveralls --strict-channel-priority
-          pip install -e . --force-reinstall
+          conda activate mhkit-env
+          conda install -y pytest coverage coveralls
+
+      - name: Install mhkit
+        shell: bash -l {0}
+        run: |
+          conda activate mhkit-env
+          pip install -e . --no-deps
 
       - name: Download Wave Hindcast data from artifact
         uses: actions/download-artifact@v4
@@ -342,9 +375,10 @@ jobs:
           mv ~/.cache/mhkit/wind-hindcast/hindcast/* ~/.cache/mhkit/hindcast/
         shell: bash
 
-      - name: Install MHKiT and run pytest
+      - name: Run hindcast pytest
         shell: bash -l {0}
         run: |
+          conda activate mhkit-env
           coverage run --rcfile=.github/workflows/.coveragehindcastrc -m pytest -c .github/workflows/pytest-hindcast.ini
           coverage lcov
 
@@ -425,21 +459,23 @@ jobs:
           activate-environment: TESTconda
           use-only-tar-bz2: true
 
-      - name: Install dependencies
+      - name: Create MHKiT Conda environment
         shell: bash -l {0}
         run: |
-          conda install numpy cython pip hdf5 libnetcdf cftime netcdf4 --strict-channel-priority
-          pip install -e . --force-reinstall
-          python -m pip install --upgrade pip wheel
-          pip install nbval jupyter
-          pip install utm folium
+          conda env create -f environment.yml
+          conda activate mhkit-env
 
-      - name: Ensure Conda environment is activated
+      - name: Install notebook testing dependencies
         shell: bash -l {0}
         run: |
-          echo "source ~/miniconda3/etc/profile.d/conda.sh" >> ~/.bashrc
-          echo "conda activate TESTconda" >> ~/.bashrc
-          source ~/.bashrc
+          conda activate mhkit-env
+          conda install -y pytest coverage coveralls nbval jupyter utm folium
+
+      - name: Install mhkit
+        shell: bash -l {0}
+        run: |
+          conda activate mhkit-env
+          pip install -e . --no-deps
 
       - name: Download non-hindcast data
         uses: actions/download-artifact@v4
@@ -477,6 +513,7 @@ jobs:
       - name: Run notebook
         shell: bash -l {0}
         run: |
+          conda activate mhkit-env
           if [[ "${{ matrix.notebook }}" == "examples/metocean_example.ipynb" || "${{ matrix.notebook }}" == "examples/WPTO_hindcast_example.ipynb" ]]; then
             if [[ "${{ needs.check-changes.outputs.should-run-hindcast }}" == 'true' ]]; then
               jupyter nbconvert --to notebook --execute --inplace --ExecutePreprocessor.timeout=${{ matrix.timeout }} "${{ matrix.notebook }}"

From ddfd14fa6eccf4e55952194824a881fb4d6b5986 Mon Sep 17 00:00:00 2001
From: ssolson <sterlingolson@yahoo.com>
Date: Fri, 15 Nov 2024 08:59:51 -0500
Subject: [PATCH 24/31] add configs and debug

---
 .github/workflows/main.yml | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index ccf3cd30f..2bf611a6f 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -67,10 +67,16 @@ jobs:
           activate-environment: TESTconda
           use-only-tar-bz2: true
 
+      - name: Configure conda channels
+        run: |
+          conda config --add channels conda-forge/label/cf-staging
+          conda config --add channels conda-forge
+          conda config --add channels defaults
+
       - name: Create MHKiT Conda environment
         shell: bash -l {0}
         run: |
-          conda env create -f environment.yml
+          conda env create -f environment.yml --debug
           conda activate mhkit-env
 
       - name: Install testing dependencies

From 9f5e4276b3032eada5da969d2f1f4f11f492d8c7 Mon Sep 17 00:00:00 2001
From: ssolson <sterlingolson@yahoo.com>
Date: Fri, 15 Nov 2024 09:14:42 -0500
Subject: [PATCH 25/31] use legacy solver

---
 .github/workflows/main.yml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 2bf611a6f..a144d6acf 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -67,6 +67,9 @@ jobs:
           activate-environment: TESTconda
           use-only-tar-bz2: true
 
+      - name: Use legacy solver
+        run: conda config --set solver classic
+
       - name: Configure conda channels
         run: |
           conda config --add channels conda-forge/label/cf-staging

From e5f1b5c5b0a955bba7c989b997b14f68fe7c721b Mon Sep 17 00:00:00 2001
From: ssolson <sterlingolson@yahoo.com>
Date: Fri, 15 Nov 2024 09:30:00 -0500
Subject: [PATCH 26/31] Ensure compatibility with modern packages

---
 .github/workflows/main.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index a144d6acf..83d06789f 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -65,7 +65,7 @@ jobs:
           auto-update-conda: true
           python-version: ${{ env.PYTHON_VER }}
           activate-environment: TESTconda
-          use-only-tar-bz2: true
+          use-only-tar-bz2: false
 
       - name: Use legacy solver
         run: conda config --set solver classic

From fd9646b073c8131ec6e1c5c19c08231ad0bca49f Mon Sep 17 00:00:00 2001
From: ssolson <sterlingolson@yahoo.com>
Date: Fri, 15 Nov 2024 09:35:18 -0500
Subject: [PATCH 27/31] Ensure compatibility with modern packages

---
 .github/workflows/main.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 83d06789f..4b6442cfd 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -125,7 +125,7 @@ jobs:
           auto-update-conda: true
           activate-environment: TEST
           python-version: ${{ env.PYTHON_VER }}
-          use-only-tar-bz2: true
+          use-only-tar-bz2: false
 
       - name: Create MHKiT Conda environment
         shell: bash -l {0}
@@ -174,7 +174,7 @@ jobs:
           auto-update-conda: true
           activate-environment: TEST
           python-version: ${{ env.PYTHON_VER }}
-          use-only-tar-bz2: true
+          use-only-tar-bz2: false
 
       - name: Create MHKiT Conda environment
         shell: bash -l {0}
@@ -466,7 +466,7 @@ jobs:
           auto-update-conda: true
           python-version: '3.11'
           activate-environment: TESTconda
-          use-only-tar-bz2: true
+          use-only-tar-bz2: false
 
       - name: Create MHKiT Conda environment
         shell: bash -l {0}

From b721f03db952e829c94a8c85c9aa8ea301a27fc9 Mon Sep 17 00:00:00 2001
From: ssolson <sterlingolson@yahoo.com>
Date: Fri, 15 Nov 2024 10:25:59 -0500
Subject: [PATCH 28/31] add pecos

---
 environment.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/environment.yml b/environment.yml
index b360b14ee..a67d4ef02 100644
--- a/environment.yml
+++ b/environment.yml
@@ -19,6 +19,7 @@ dependencies:
   - numexpr>=2.10.0
   - lxml
   - bottleneck
+  - pecos
   - pip:
       - netCDF4>=1.7.1.post1
       - matplotlib>=3.9.1

From e7fc5843f99ddf66de102e9888cfea459cbd88c5 Mon Sep 17 00:00:00 2001
From: ssolson <sterlingolson@yahoo.com>
Date: Fri, 15 Nov 2024 10:45:37 -0500
Subject: [PATCH 29/31] netcdf4 from pip to conda

---
 environment.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/environment.yml b/environment.yml
index a67d4ef02..27bd1cce2 100644
--- a/environment.yml
+++ b/environment.yml
@@ -20,8 +20,8 @@ dependencies:
   - lxml
   - bottleneck
   - pecos
+  - netCDF4>=1.7.2
   - pip:
-      - netCDF4>=1.7.1.post1
       - matplotlib>=3.9.1
       - pecos>=0.3.0
       - fatpack

From 24fa8a577e0e9a820fd33a5cae80f6add9f1c1b5 Mon Sep 17 00:00:00 2001
From: ssolson <sterlingolson@yahoo.com>
Date: Fri, 15 Nov 2024 11:45:08 -0500
Subject: [PATCH 30/31] py 3.11, relax hdf5& netCDF4

---
 environment.yml | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/environment.yml b/environment.yml
index 27bd1cce2..f849ff3ac 100644
--- a/environment.yml
+++ b/environment.yml
@@ -1,10 +1,9 @@
 name: mhkit-env
 channels:
-  - conda-forge/label/cf-staging
   - conda-forge
   - defaults
 dependencies:
-  - python>=3.10
+  - python=3.11
   - pip
   - numpy>=2.0.0
   - pandas>=2.2.2
@@ -13,17 +12,17 @@ dependencies:
   - scikit-learn>=1.5.1
   - h5py>=3.11.0
   - h5pyd>=0.18.0
+  - netCDF4>=1.6.5
+  - hdf5>=1.14.3,<1.14.5.0a0
   - statsmodels>=0.14.2
   - requests
   - beautifulsoup4
   - numexpr>=2.10.0
   - lxml
   - bottleneck
-  - pecos
-  - netCDF4>=1.7.2
+  - pecos>=0.3.0
   - pip:
       - matplotlib>=3.9.1
-      - pecos>=0.3.0
       - fatpack
       - NREL-rex>=0.2.63
       - notebook

From 259e7e58531f704558b7dabcfbcb27894b1163e3 Mon Sep 17 00:00:00 2001
From: ssolson <sterlingolson@yahoo.com>
Date: Fri, 15 Nov 2024 11:58:24 -0500
Subject: [PATCH 31/31] relax python constraints

---
 environment.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/environment.yml b/environment.yml
index f849ff3ac..81cdaf613 100644
--- a/environment.yml
+++ b/environment.yml
@@ -3,7 +3,7 @@ channels:
   - conda-forge
   - defaults
 dependencies:
-  - python=3.11
+  - python>=3.10
   - pip
   - numpy>=2.0.0
   - pandas>=2.2.2