From 17e84a42b753c46fba1326709856e0ef5f061434 Mon Sep 17 00:00:00 2001 From: Philip Chmielowiec <67855069+philipc2@users.noreply.github.com> Date: Fri, 17 Jan 2025 13:43:57 -0600 Subject: [PATCH] User Guide on Custom Grids & `.from_xarray()` methods (#1086) * docs/user-guide/custom-grid.ipynb * add notebook * update notebook * update api ref * docstrings * update docstrings * gcam * o Minor notebook fixes --------- Co-authored-by: Aaron Zedwick <95507181+aaronzedwick@users.noreply.github.com> Co-authored-by: Rajeev Jain --- docs/api.rst | 2 + docs/user-guide/custom-grid.ipynb | 355 ++++++++++++++++++++++++++++++ docs/userguide.rst | 3 + uxarray/core/dataarray.py | 28 +++ uxarray/core/dataset.py | 38 +++- 5 files changed, 425 insertions(+), 1 deletion(-) create mode 100644 docs/user-guide/custom-grid.ipynb diff --git a/docs/api.rst b/docs/api.rst index c066dac8c..65e7abf5a 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -195,6 +195,7 @@ I/O & Conversion UxDataArray.to_geodataframe UxDataArray.to_polycollection UxDataArray.to_dataset + UxDataArray.from_xarray UxDataset ----------- @@ -222,6 +223,7 @@ I/O & Conversion :toctree: generated/ UxDataset.from_structured + UxDataset.from_xarray Plotting -------- diff --git a/docs/user-guide/custom-grid.ipynb b/docs/user-guide/custom-grid.ipynb new file mode 100644 index 000000000..968b041c9 --- /dev/null +++ b/docs/user-guide/custom-grid.ipynb @@ -0,0 +1,355 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "a8e2583ad32cdf8", + "metadata": {}, + "source": [ + "# Custom Grid Topology\n", + "\n", + "This notebook will showcase how to construct UXarray data structures from custom grid topology information, including how to convert existing Xarray data structures to UXarray." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "57e64b0ff76581ca", + "metadata": {}, + "outputs": [], + "source": [ + "import uxarray as ux\n", + "import xarray as xr\n", + "import numpy as np" + ] + }, + { + "cell_type": "markdown", + "id": "4cd183a0-3658-4c68-9e08-ed4ff063a0ea", + "metadata": {}, + "source": [ + "## Minimal Grid Definition\n", + "\n", + "The UGRID conventions require a minimal set of variables for representing a 2D unstructured grid. \n", + "\n", + "| **Variable** | **Shape** | **Description** |\n", + "|--------------------------|--------------------------------|------------------------------------------------|\n", + "| `node_lon` | `(n_node, )` | Longitude of the nodes that make up each face. |\n", + "| `node_lat` | `(n_node, )` | Latitude of the nodes that make up each face. |\n", + "| `face_node_connectivity` | `(n_face, n_max_face_nodes])` | Indices of the nodes that surround each face. |\n" + ] + }, + { + "cell_type": "markdown", + "id": "072075f6-6b91-4e91-9c28-67bb9ee073ef", + "metadata": {}, + "source": [ + "## Fixed Face Sizes\n", + "\n", + "For grids where each face has the same number of nodes, such as strictly triangular grids, each row in the `face_node_connectivity` array will contain the indices of nodes that surround each face, with no fill values. Below is an example of the node coordinates and connectivity required for representing a single triangle in the UGRID conventions." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f608c0e2-9334-4e1e-aed4-d44b923a5566", + "metadata": {}, + "outputs": [], + "source": [ + "node_lon = np.array([-20.0, 0.0, 20.0])\n", + "node_lat = np.array([-10.0, 10.0, -10.0])\n", + "face_node_connectivity = np.array(\n", + " [\n", + " [0, 1, 2],\n", + " ]\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "f8e3b665-c225-44d6-81b1-80ad23a1b38d", + "metadata": {}, + "source": [ + "These variables can be passed directly into the `Grid.from_topology()` class-method, which allows custom grid topology information. This is especially useful for cases where a specific grid format isn't directly supported. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b1b3fe63-28f1-4082-910f-8fa3179f1cf8", + "metadata": {}, + "outputs": [], + "source": [ + "uxgrid_tri = ux.Grid.from_topology(\n", + " node_lon=node_lon, node_lat=node_lat, face_node_connectivity=face_node_connectivity\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d9846c0f-309b-4b8a-b355-98d23bfd5ce9", + "metadata": {}, + "outputs": [], + "source": [ + "uxgrid_tri.plot(title=\"Triangle\")" + ] + }, + { + "cell_type": "markdown", + "id": "f96283a6-a6b8-40ae-99e5-860686fc580d", + "metadata": {}, + "source": [ + "## Mixed Face Sizes\n", + "\n", + "For grids where each face does not have the same number of nodes, the `face_node_connectivity` array will have it's final dimension (`n_max_face_nodes`) set to the largest element shape. For example, a grid with triangles and quadrialterals will have a final dimension of 4. Any element that has less than the maximum number of nodes will be padded with a fill value. The `face_node_connectivity` array below showcases a basic grid with a single triangle and quadrilateral. Observe that the first row is set to `[0, 1, 2, -1]`, with the first three integers being the indices of the triangle corners, and the final value used as a fill value." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "43e13ad3-e020-464f-a14b-bbac8bed6bb5", + "metadata": {}, + "outputs": [], + "source": [ + "node_lon = np.array([-20.0, 0.0, 20.0, -20, -40])\n", + "node_lat = np.array([-10.0, 10.0, -10.0, 10, -10])\n", + "face_node_connectivity = np.array([[0, 1, 2, -1], [0, 1, 3, 4]])" + ] + }, + { + "cell_type": "markdown", + "id": "c28d3bb0-b469-41e4-a250-5b8f5f2125a9", + "metadata": {}, + "source": [ + "The `fill_value` parameter must be passed in when working with a mixed topology grid. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4669559c-5f79-47ed-a53f-6bf85b1eaea4", + "metadata": {}, + "outputs": [], + "source": [ + "uxgrid_tri_quad = ux.Grid.from_topology(\n", + " node_lon=node_lon,\n", + " node_lat=node_lat,\n", + " face_node_connectivity=face_node_connectivity,\n", + " fill_value=-1,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e54be66c-d602-41c1-a966-efc4bb7a29c8", + "metadata": {}, + "outputs": [], + "source": [ + "uxgrid_tri_quad.plot(title=\"Triangle & Quad\")" + ] + }, + { + "cell_type": "markdown", + "id": "36d09bd1-87cc-4b78-8d2d-8108ef61f7a2", + "metadata": {}, + "source": [ + "## Working with Existing Xarray Structures\n", + "\n", + "The previous examples showcase how to create a `Grid` from custom topology. The follow sections will walk through how to match data to the `Grid`." + ] + }, + { + "cell_type": "markdown", + "id": "0876cfed-16a2-4532-ab7e-60754c897a5f", + "metadata": {}, + "source": [ + "### `xr.DataArray` to `ux.UxDataArray`\n", + "\n", + "Consider the previous example where we constructed the `uxgrid_tri` grid, consisting of a single triangle. Below is an example `xr.DataArray` containing four temperature values. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0d7dce48-e6f4-4bba-8b25-961139fb07b5", + "metadata": {}, + "outputs": [], + "source": [ + "xrda_temp = xr.DataArray(\n", + " name=\"temp\",\n", + " data=np.array(\n", + " [\n", + " [100, 105, 108, 109],\n", + " ]\n", + " ).T,\n", + " dims=[\"time\", \"cell\"],\n", + ")\n", + "xrda_temp" + ] + }, + { + "cell_type": "markdown", + "id": "41f67fbad5c3e485", + "metadata": {}, + "source": [ + "The original dimension must be mapped to their UGRID equivalent. \n", + "\n", + "| **Data Mapping** | **UGRID Dimension Name** |\n", + "|------------------|--------------------------|\n", + "| Faces | n_face |\n", + "| Edges | n_edge |\n", + "| Nodes | n_node |\n", + "\n", + "\n", + "In the example above, the `cell` dimension corresponds to the faces of the grid, meaning we want to translate the dimension to `n_face`\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "db7ea793-4f8e-4ae9-beec-1bfb0080d79f", + "metadata": {}, + "outputs": [], + "source": [ + "ugrid_dims = {\"cell\": \"n_face\"}" + ] + }, + { + "cell_type": "markdown", + "id": "da24da79813502a5", + "metadata": {}, + "source": [ + "The `UxDataArray.from_xarray()` takes in a user-defined `Grid` in addition to the original `xr.DataArray` and UGRID dimension mapping and returns a `UxDataArray`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "797993ac-0e83-437b-a950-704f31f4eae4", + "metadata": {}, + "outputs": [], + "source": [ + "uxda_temp = ux.UxDataArray.from_xarray(xrda_temp, uxgrid_tri, ugrid_dims)\n", + "uxda_temp" + ] + }, + { + "cell_type": "markdown", + "id": "67131fdf-3431-47ea-8e9c-0409300d7667", + "metadata": {}, + "source": [ + "### `xr.Dataset` to `ux.UxDataset`\n", + "\n", + "More commonly, you may have an entire `xr.Dataset` that contains data variables. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e967c9466b3cb25d", + "metadata": {}, + "outputs": [], + "source": [ + "xrda_vorticity = xr.DataArray(\n", + " name=\"vorticity\",\n", + " data=np.array(\n", + " [\n", + " [1, 2, 3, 4],\n", + " [5, 6, 7, 8],\n", + " [9, 10, 11, 12],\n", + " ]\n", + " ).T,\n", + " dims=[\"time\", \"vertex\"],\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "86c9a24a4320d7d0", + "metadata": {}, + "source": [ + "In this example, we have a cell-centered `temp` and vertex-centered `vorticity` variable. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "906532aa-acb3-43c4-95f9-ef9d29312865", + "metadata": {}, + "outputs": [], + "source": [ + "ds = xr.Dataset(data_vars={\"temp\": xrda_temp, \"vorticity\": xrda_vorticity})\n", + "ds" + ] + }, + { + "cell_type": "markdown", + "id": "bb01a75189469712", + "metadata": {}, + "source": [ + "We must now include an additional entry into our `ugrid_dims` dictionary for our vertex-centered data variable." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a266b291-c189-40a3-955b-d64dc937d5c9", + "metadata": {}, + "outputs": [], + "source": [ + "ugrid_dims = {\"cell\": \"n_face\", \"vertex\": \"n_node\"}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8a9b3bf5-38bb-4e9a-8890-4d540b9f62b7", + "metadata": {}, + "outputs": [], + "source": [ + "uxds = ux.UxDataset.from_xarray(ds=ds, uxgrid=uxgrid_tri, ugrid_dims=ugrid_dims)" + ] + }, + { + "cell_type": "markdown", + "id": "7d0b1326", + "metadata": {}, + "source": [ + "uxds now has two `UxDataArray` objects, one for the cell-centered data `temp` and one for the vertex-centered data `vorticity`. There are 4 time steps 0, 1, 2, and 3. Values for time step 0 are shown below." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "992a409a", + "metadata": {}, + "outputs": [], + "source": [ + "uxds[\"vorticity\"][0]" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "uxarray_env3.12", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.7" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/userguide.rst b/docs/userguide.rst index b62cce86c..5ebf96de9 100644 --- a/docs/userguide.rst +++ b/docs/userguide.rst @@ -31,6 +31,9 @@ These user guides provide detailed explanations of the core functionality in UXa `Data Structures `_ Core data structures for working with unstructured grid and data files +`Custom Grid Topology `_ + Create a Grid from custom Grid topology and convert existing Xarray data structures to UXarray. + `Loading Data using Dask `_ Read data with chunking and/or in parallel diff --git a/uxarray/core/dataarray.py b/uxarray/core/dataarray.py index 5c07a70a5..c2244f934 100644 --- a/uxarray/core/dataarray.py +++ b/uxarray/core/dataarray.py @@ -35,6 +35,7 @@ from uxarray.remap import UxDataArrayRemapAccessor from uxarray.cross_sections import UxDataArrayCrossSectionAccessor from uxarray.core.aggregation import _uxda_grid_aggregate +from uxarray.core.utils import _map_dims_to_ugrid import warnings from warnings import warn @@ -1158,6 +1159,33 @@ def isel(self, ignore_grid=False, inverse_indices=False, *args, **kwargs): # original xarray implementation for non-grid dimensions return super().isel(*args, **kwargs) + @classmethod + def from_xarray(cls, da: xr.DataArray, uxgrid: Grid, ugrid_dims: dict = None): + """ + Converts a ``xarray.DataArray`` into a ``uxarray.UxDataset`` paired with a user-defined ``Grid`` + + Parameters + ---------- + da : xr.DataArray + An Xarray data array containing data residing on an unstructured grid + uxgrid : Grid + ``Grid`` object representing an unstructured grid + ugrid_dims : dict, optional + A dictionary mapping data array dimensions to UGRID dimensions. + + Returns + ------- + cls + A ``ux.UxDataArray`` with data from the ``xr.DataArray` paired with a ``ux.Grid`` + """ + if ugrid_dims is None: + ugrid_dims = uxgrid._source_dims_dict + + # map each dimension to its UGRID equivalent + ds = _map_dims_to_ugrid(da, ugrid_dims, uxgrid) + + return cls(ds, uxgrid=uxgrid) + def _slice_from_grid(self, sliced_grid): """Slices a ``UxDataArray`` from a sliced ``Grid``, using cached indices to correctly slice the data variable.""" diff --git a/uxarray/core/dataset.py b/uxarray/core/dataset.py index 84dd97dd5..80e2f8027 100644 --- a/uxarray/core/dataset.py +++ b/uxarray/core/dataset.py @@ -220,7 +220,7 @@ def from_dict(cls, data, **kwargs): ) @classmethod - def from_structured(cls, ds: xr.Dataset, tol: Optional[float] = 1e-10): + def from_structured(cls, ds: xr.Dataset): """Converts a structured ``xarray.Dataset`` into an unstructured ``uxarray.UxDataset`` Parameters @@ -252,6 +252,42 @@ def from_structured(cls, ds: xr.Dataset, tol: Optional[float] = 1e-10): return cls(ds, uxgrid=uxgrid) + @classmethod + def from_xarray(cls, ds: xr.Dataset, uxgrid: Grid = None, ugrid_dims: dict = None): + """ + Converts a ``xarray.Dataset`` into a ``uxarray.UxDataset``, paired with either a user-defined or + parsed ``Grid`` + + Parameters + ---------- + ds : xr.Dataset + An Xarray dataset containing data residing on an unstructured grid + uxgrid : Grid, optional + ``Grid`` object representing an unstructured grid. If a grid is not provided, the source ds will be + parsed to see if a ``Grid`` can be constructed. + ugrid_dims : dict, optional + A dictionary mapping dataset dimensions to UGRID dimensions. + + Returns + ------- + cls + A ``ux.UxDataset`` with data from the ``xr.Dataset` paired with a ``ux.Grid`` + """ + if uxgrid is not None: + if ugrid_dims is None and uxgrid._source_dims_dict is not None: + ugrid_dims = uxgrid._source_dims_dict + pass + # Grid is provided, + else: + # parse + uxgrid = Grid.from_dataset(ds) + ugrid_dims = uxgrid._source_dims_dict + + # map each dimension to its UGRID equivalent + ds = _map_dims_to_ugrid(ds, ugrid_dims, uxgrid) + + return cls(ds, uxgrid=uxgrid) + def info(self, buf: IO = None, show_attrs=False) -> None: """Concise summary of Dataset variables and attributes including grid topology information stored in the ``uxgrid`` property.