pydata · TomNicholas · Jul 26, 2024 · Jul 26, 2024 · Jul 26, 2024 · Jul 26, 2024
diff --git a/doc/internals/chunked-arrays.rst b/doc/internals/chunked-arrays.rst
@@ -7,13 +7,13 @@ Alternative chunked array types
 
 .. warning::
 
-    This is a *highly* experimental feature. Please report any bugs or other difficulties on `xarray's issue tracker <https://github.com/pydata/xarray/issues>`_.
+    This is an experimental feature. Please report any bugs or other difficulties on `xarray's issue tracker <https://github.com/pydata/xarray/issues>`_.
     In particular see discussion on `xarray issue #6807 <https://github.com/pydata/xarray/issues/6807>`_
 
-Xarray can wrap chunked dask arrays (see :ref:`dask`), but can also wrap any other chunked array type that exposes the correct interface.
+Xarray can wrap chunked dask arrays (see :ref:`dask`), but can also wrap any other chunked array type which exposes the correct interface.
 This allows us to support using other frameworks for distributed and out-of-core processing, with user code still written as xarray commands.
 In particular xarray also supports wrapping :py:class:`cubed.Array` objects
-(see `Cubed's documentation <https://tom-e-white.com/cubed/>`_ and the `cubed-xarray package <https://github.com/xarray-contrib/cubed-xarray>`_).
+(see `Cubed's documentation <https://tom-e-white.com/cubed/>`_ via the `cubed-xarray package <https://github.com/xarray-contrib/cubed-xarray>`_).
 
 The basic idea is that by wrapping an array that has an explicit notion of ``.chunks``, xarray can expose control over
 the choice of chunking scheme to users via methods like :py:meth:`DataArray.chunk` whilst the wrapped array actually
@@ -25,11 +25,12 @@ Chunked array methods and "core operations"
 A chunked array needs to meet all the :ref:`requirements for normal duck arrays <internals.duckarrays.requirements>`, but must also
 implement additional features.
 
-Chunked arrays have additional attributes and methods, such as ``.chunks`` and ``.rechunk``.
-Furthermore, Xarray dispatches chunk-aware computations across one or more chunked arrays using special functions known
-as "core operations". Examples include ``map_blocks``, ``blockwise``, and ``apply_gufunc``.
+Chunked arrays will have additional attributes and methods, such as ``.chunks`` and ``.rechunk``.
+If the wrapped class only implements these additional methods then xarray will handle them in the same way it handles other duck arrays - i.e. with no further action on the user's part.
+
+However to support applying computations across chunks, Xarray dispatches all chunk-aware computations across one or more chunked arrays using special functions known
+as "core operations". The core operations are generalizations of functions first implemented in :py:mod:`dask.array`, and examples include ``map_blocks``, ``blockwise``, and ``apply_gufunc``.
 
-The core operations are generalizations of functions first implemented in :py:mod:`dask.array`.
 The implementation of these functions is specific to the type of arrays passed to them. For example, when applying the
 ``map_blocks`` core operation, :py:class:`dask.array.Array` objects must be processed by :py:func:`dask.array.map_blocks`,
 whereas :py:class:`cubed.Array` objects must be processed by :py:func:`cubed.map_blocks`.
@@ -100,3 +101,9 @@ To use a parallel array type that does not expose a concept of chunks explicitly
 is theoretically required. Such an array type (e.g. `Ramba <https://github.com/Python-for-HPC/ramba>`_ or
 `Arkouda <https://github.com/Bears-R-Us/arkouda>`_) could be wrapped using xarray's existing support for
 :ref:`numpy-like "duck" arrays <userguide.duckarrays>`.
+
+Chunks without parallel processing
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Some chunked array types exist which don't support parallel processing.
+These will define `.chunks` and possibly also `.rechunk`, but do not require a `ChunkManagerEntrypoint` in order for these method to be called by `DataArray.chunk`.
diff --git a/xarray/coding/strings.py b/xarray/coding/strings.py
@@ -18,7 +18,7 @@
 from xarray.core.utils import module_available
 from xarray.core.variable import Variable
 from xarray.namedarray.parallelcompat import get_chunked_array_type
-from xarray.namedarray.pycompat import is_chunked_array
+from xarray.namedarray.pycompat import has_chunkmanager, is_chunked_array
 
 HAS_NUMPY_2_0 = module_available("numpy", minversion="2.0.0.dev0")
 
@@ -144,7 +144,7 @@ def bytes_to_char(arr):
     if arr.dtype.kind != "S":
         raise ValueError("argument must have a fixed-width bytes dtype")
 
-    if is_chunked_array(arr):
+    if has_chunkmanager(arr):
         chunkmanager = get_chunked_array_type(arr)
 
         return chunkmanager.map_blocks(
@@ -183,7 +183,7 @@ def char_to_bytes(arr):
         # can't make an S0 dtype
         return np.zeros(arr.shape[:-1], dtype=np.bytes_)
 
-    if is_chunked_array(arr):
+    if is_chunked_array(arr) and has_chunkmanager(arr):
         chunkmanager = get_chunked_array_type(arr)
 
         if len(arr.chunks[-1]) > 1:

diff --git a/xarray/coding/times.py b/xarray/coding/times.py
@@ -27,8 +27,11 @@
 from xarray.core.pdcompat import nanosecond_precision_timestamp
 from xarray.core.utils import emit_user_level_warning
 from xarray.core.variable import Variable
-from xarray.namedarray.parallelcompat import T_ChunkedArray, get_chunked_array_type
-from xarray.namedarray.pycompat import is_chunked_array
+from xarray.namedarray.parallelcompat import (
+    T_ChunkedArray,
+    get_chunked_array_type,
+)
+from xarray.namedarray.pycompat import has_chunkmanager, is_chunked_array
 from xarray.namedarray.utils import is_duck_dask_array
 
 try:
@@ -719,7 +722,7 @@ def encode_cf_datetime(
     cftime.date2num
     """
     dates = asarray(dates)
-    if is_chunked_array(dates):
+    if is_chunked_array(dates) and has_chunkmanager(dates):
         return _lazily_encode_cf_datetime(dates, units, calendar, dtype)
     else:
         return _eagerly_encode_cf_datetime(dates, units, calendar, dtype)
@@ -864,7 +867,7 @@ def encode_cf_timedelta(
     dtype: np.dtype | None = None,
 ) -> tuple[T_DuckArray, str]:
     timedeltas = asarray(timedeltas)
-    if is_chunked_array(timedeltas):
+    if is_chunked_array(timedeltas) and has_chunkmanager(timedeltas):
         return _lazily_encode_cf_timedelta(timedeltas, units, dtype)
     else:
         return _eagerly_encode_cf_timedelta(timedeltas, units, dtype)

diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py
@@ -13,7 +13,7 @@
 from xarray.core import dtypes, duck_array_ops, indexing
 from xarray.core.variable import Variable
 from xarray.namedarray.parallelcompat import get_chunked_array_type
-from xarray.namedarray.pycompat import is_chunked_array
+from xarray.namedarray.pycompat import has_chunkmanager, is_chunked_array
 
 if TYPE_CHECKING:
     T_VarTuple = tuple[tuple[Hashable, ...], Any, dict, dict]
@@ -176,7 +176,7 @@ def lazy_elemwise_func(array, func: Callable, dtype: np.typing.DTypeLike):
     -------
     Either a dask.array.Array or _ElementwiseFunctionArray.
     """
-    if is_chunked_array(array):
+    if is_chunked_array(array) and has_chunkmanager(array):
         chunkmanager = get_chunked_array_type(array)
 
         return chunkmanager.map_blocks(func, array, dtype=dtype)  # type: ignore[arg-type]

diff --git a/xarray/core/common.py b/xarray/core/common.py
@@ -19,8 +19,11 @@
     is_scalar,
 )
 from xarray.namedarray.core import _raise_if_any_duplicate_dimensions
-from xarray.namedarray.parallelcompat import get_chunked_array_type, guess_chunkmanager
-from xarray.namedarray.pycompat import is_chunked_array
+from xarray.namedarray.parallelcompat import (
+    get_chunked_array_type,
+    guess_chunkmanager,
+)
+from xarray.namedarray.pycompat import has_chunkmanager, is_chunked_array
 
 try:
     import cftime
@@ -1717,6 +1720,7 @@ def _full_like_variable(
 
     if (
         is_chunked_array(other.data)
+        and has_chunkmanager(other.data)
         or chunked_array_type is not None
         or chunks is not None
     ):

diff --git a/xarray/core/computation.py b/xarray/core/computation.py
@@ -26,7 +26,7 @@
 from xarray.core.utils import is_dict_like, is_duck_dask_array, is_scalar, parse_dims
 from xarray.core.variable import Variable
 from xarray.namedarray.parallelcompat import get_chunked_array_type
-from xarray.namedarray.pycompat import is_chunked_array
+from xarray.namedarray.pycompat import has_chunkmanager, is_chunked_array
 from xarray.util.deprecation_helpers import deprecate_dims
 
 if TYPE_CHECKING:
@@ -2169,7 +2169,7 @@ def _calc_idxminmax(
     indx = func(array, dim=dim, axis=None, keep_attrs=keep_attrs, skipna=skipna)
 
     # Handle chunked arrays (e.g. dask).
-    if is_chunked_array(array.data):
+    if is_chunked_array(array.data) and has_chunkmanager(array.data):
         chunkmanager = get_chunked_array_type(array.data)
         chunks = dict(zip(array.dims, array.chunks))
         dask_coord = chunkmanager.from_array(array[dim].data, chunks=chunks[dim])

diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
@@ -125,7 +125,7 @@
     calculate_dimensions,
 )
 from xarray.namedarray.parallelcompat import get_chunked_array_type, guess_chunkmanager
-from xarray.namedarray.pycompat import array_type, is_chunked_array
+from xarray.namedarray.pycompat import array_type, has_chunkmanager, is_chunked_array
 from xarray.plot.accessor import DatasetPlotAccessor
 from xarray.util.deprecation_helpers import _deprecate_positional_args, deprecate_dims
 
@@ -856,7 +856,9 @@ def load(self, **kwargs) -> Self:
         """
         # access .data to coerce everything to numpy or dask arrays
         lazy_data = {
-            k: v._data for k, v in self.variables.items() if is_chunked_array(v._data)
+            k: v._data
+            for k, v in self.variables.items()
+            if is_chunked_array(v._data) and has_chunkmanager(v._data)
         }
         if lazy_data:
             chunkmanager = get_chunked_array_type(*lazy_data.values())

diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py
@@ -39,7 +39,7 @@
 from xarray.core.utils import is_duck_array, is_duck_dask_array, module_available
 from xarray.namedarray import pycompat
 from xarray.namedarray.parallelcompat import get_chunked_array_type
-from xarray.namedarray.pycompat import array_type, is_chunked_array
+from xarray.namedarray.pycompat import array_type, has_chunkmanager, is_chunked_array
 
 # remove once numpy 2.0 is the oldest supported version
 if module_available("numpy", minversion="2.0.0.dev0"):
@@ -736,7 +736,7 @@ def first(values, axis, skipna=None):
         dtypes.isdtype(values.dtype, "signed integer") or dtypes.is_string(values.dtype)
     ):
         # only bother for dtypes that can hold NaN
-        if is_chunked_array(values):
+        if is_chunked_array(values) and has_chunkmanager(values):
             return chunked_nanfirst(values, axis)
         else:
             return nputils.nanfirst(values, axis)
@@ -749,7 +749,7 @@ def last(values, axis, skipna=None):
         dtypes.isdtype(values.dtype, "signed integer") or dtypes.is_string(values.dtype)
     ):
         # only bother for dtypes that can hold NaN
-        if is_chunked_array(values):
+        if is_chunked_array(values) and has_chunkmanager(values):
             return chunked_nanlast(values, axis)
         else:
             return nputils.nanlast(values, axis)

diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py
@@ -28,7 +28,12 @@
     to_0d_array,
 )
 from xarray.namedarray.parallelcompat import get_chunked_array_type
-from xarray.namedarray.pycompat import array_type, integer_types, is_chunked_array
+from xarray.namedarray.pycompat import (
+    array_type,
+    has_chunkmanager,
+    integer_types,
+    is_chunked_array,
+)
 
 if TYPE_CHECKING:
     from numpy.typing import DTypeLike
@@ -1349,7 +1354,7 @@ def _masked_result_drop_slice(key, data: duckarray[Any, Any] | None = None):
     new_keys = []
     for k in key:
         if isinstance(k, np.ndarray):
-            if is_chunked_array(data):  # type: ignore[arg-type]
+            if is_chunked_array(data) and has_chunkmanager(data):  # type: ignore[arg-type]
                 chunkmanager = get_chunked_array_type(data)
                 new_keys.append(
                     _chunked_array_with_chunks_hint(k, chunks_hint, chunkmanager)

diff --git a/xarray/core/missing.py b/xarray/core/missing.py
@@ -24,7 +24,7 @@
 from xarray.core.utils import OrderedSet, is_scalar
 from xarray.core.variable import Variable, broadcast_variables
 from xarray.namedarray.parallelcompat import get_chunked_array_type
-from xarray.namedarray.pycompat import is_chunked_array
+from xarray.namedarray.pycompat import has_chunkmanager, is_chunked_array
 
 if TYPE_CHECKING:
     from xarray.core.dataarray import DataArray
@@ -690,7 +690,7 @@ def interp_func(var, x, new_x, method: InterpOptions, kwargs):
     else:
         func, kwargs = _get_interpolator_nd(method, **kwargs)
 
-    if is_chunked_array(var):
+    if is_chunked_array(var) and has_chunkmanager(var):
         chunkmanager = get_chunked_array_type(var)
 
         ndim = var.ndim

diff --git a/xarray/core/utils.py b/xarray/core/utils.py
@@ -1036,14 +1036,16 @@ def contains_only_chunked_or_numpy(obj) -> bool:
 
     Expects obj to be Dataset or DataArray"""
     from xarray.core.dataarray import DataArray
-    from xarray.namedarray.pycompat import is_chunked_array
+    from xarray.namedarray.pycompat import has_chunkmanager, is_chunked_array
 
     if isinstance(obj, DataArray):
         obj = obj._to_temp_dataset()
 
     return all(
         [
-            isinstance(var.data, np.ndarray) or is_chunked_array(var.data)
+            isinstance(var.data, np.ndarray)
+            or is_chunked_array(var.data)
+            and has_chunkmanager(var.data)
             for var in obj.variables.values()
         ]
     )

diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py
@@ -41,7 +41,7 @@
     _SupportsReal,
 )
 from xarray.namedarray.parallelcompat import guess_chunkmanager
-from xarray.namedarray.pycompat import to_numpy
+from xarray.namedarray.pycompat import is_chunked_array, to_numpy
 from xarray.namedarray.utils import (
     either_dict_or_kwargs,
     infix_dims,
@@ -819,11 +819,18 @@ def chunk(
                 if dim in chunks
             }
 
-        chunkmanager = guess_chunkmanager(chunked_array_type)
-
         data_old = self._data
-        if chunkmanager.is_chunked_array(data_old):
-            data_chunked = chunkmanager.rechunk(data_old, chunks)  # type: ignore[arg-type]
+        if is_chunked_array(data_old):
+            print(f"problematic chunks = {chunks}")
+            # if is_dict_like(chunks) and chunks != {}:
+            #     chunks = tuple(chunks.get(n, s) for n, s in enumerate(data_old.shape))  # type: ignore[assignment]
+
+            print(f"hopefully normalized chunks = {chunks}")
+
+            # Assume any chunked array supports .rechunk - if it doesn't then at least a clear AttributeError will be raised.
+            # Deliberately don't go through the chunkmanager so as to support chunked array types that don't need all the special computation methods.
+            # See GH issue #8733
+            data_chunked = data_old.rechunk(chunks)  # type: ignore[union-attr]
         else:
             if not isinstance(data_old, ExplicitlyIndexed):
                 ndata = data_old
@@ -841,13 +848,13 @@ def chunk(
             if is_dict_like(chunks):
                 chunks = tuple(chunks.get(n, s) for n, s in enumerate(ndata.shape))  # type: ignore[assignment]
 
+            chunkmanager = guess_chunkmanager(chunked_array_type)
             data_chunked = chunkmanager.from_array(ndata, chunks, **from_array_kwargs)  # type: ignore[arg-type]
 
         return self._replace(data=data_chunked)
 
     def to_numpy(self) -> np.ndarray[Any, Any]:
         """Coerces wrapped data to numpy and returns a numpy.ndarray"""
-        # TODO an entrypoint so array libraries can choose coercion method?
         return to_numpy(self._data)
 
     def as_numpy(self) -> Self:

diff --git a/xarray/namedarray/daskmanager.py b/xarray/namedarray/daskmanager.py
@@ -41,9 +41,6 @@ def __init__(self) -> None:
     def is_chunked_array(self, data: duckarray[Any, Any]) -> bool:
         return is_duck_dask_array(data)
 
-    def chunks(self, data: Any) -> _NormalizedChunks:
-        return data.chunks  # type: ignore[no-any-return]
-
     def normalize_chunks(
         self,
         chunks: T_Chunks | _NormalizedChunks,

diff --git a/xarray/namedarray/parallelcompat.py b/xarray/namedarray/parallelcompat.py
@@ -218,30 +218,6 @@ def is_chunked_array(self, data: duckarray[Any, Any]) -> bool:
         """
         return isinstance(data, self.array_cls)
 
-    @abstractmethod
-    def chunks(self, data: T_ChunkedArray) -> _NormalizedChunks:
-        """
-        Return the current chunks of the given array.
-
-        Returns chunks explicitly as a tuple of tuple of ints.
-
-        Used internally by xarray objects' .chunks and .chunksizes properties.
-
-        Parameters
-        ----------
-        data : chunked array
-
-        Returns
-        -------
-        chunks : tuple[tuple[int, ...], ...]
-
-        See Also
-        --------
-        dask.array.Array.chunks
-        cubed.Array.chunks
-        """
-        raise NotImplementedError()
-
     @abstractmethod
     def normalize_chunks(
         self,
@@ -305,37 +281,6 @@ def from_array(
         """
         raise NotImplementedError()
 
-    def rechunk(
-        self,
-        data: T_ChunkedArray,
-        chunks: _NormalizedChunks | tuple[int, ...] | _Chunks,
-        **kwargs: Any,
-    ) -> Any:
-        """
-        Changes the chunking pattern of the given array.
-
-        Called when the .chunk method is called on an xarray object that is already chunked.
-
-        Parameters
-        ----------
-        data : dask array
-            Array to be rechunked.
-        chunks :  int, tuple, dict or str, optional
-            The new block dimensions to create. -1 indicates the full size of the
-            corresponding dimension. Default is "auto" which automatically
-            determines chunk sizes.
-
-        Returns
-        -------
-        chunked array
-
-        See Also
-        --------
-        dask.array.Array.rechunk
-        cubed.Array.rechunk
-        """
-        return data.rechunk(chunks, **kwargs)
-
     @abstractmethod
     def compute(
         self, *data: T_ChunkedArray | Any, **kwargs: Any