From 1654415e302b4673e259d60206fba730d760ffdb Mon Sep 17 00:00:00 2001 From: Ashwin Srinath Date: Fri, 10 Jan 2025 06:31:31 -0500 Subject: [PATCH] Use a numpy array rather than ctypes object --- .../cuda/parallel/experimental/_cccl.py | 11 ++++------- .../parallel/experimental/algorithms/reduce.py | 8 ++++++-- .../cuda/parallel/experimental/struct.py | 14 ++++++++------ python/cuda_parallel/tests/test_reduce_api.py | 7 +++++-- 4 files changed, 23 insertions(+), 17 deletions(-) diff --git a/python/cuda_parallel/cuda/parallel/experimental/_cccl.py b/python/cuda_parallel/cuda/parallel/experimental/_cccl.py index 2cd9a999066..3562b0aa33c 100644 --- a/python/cuda_parallel/cuda/parallel/experimental/_cccl.py +++ b/python/cuda_parallel/cuda/parallel/experimental/_cccl.py @@ -13,7 +13,7 @@ from ._utils.cai import get_dtype, is_contiguous from .iterators._iterators import IteratorBase -from .typing import DeviceArrayLike, GpuStruct +from .typing import DeviceArrayLike # MUST match `cccl_type_enum` in c/include/cccl/c/types.h @@ -215,10 +215,7 @@ def to_cccl_iter(array_or_iterator) -> Iterator: return _device_array_to_cccl_iter(array_or_iterator) -def to_cccl_value(array_or_struct: np.ndarray | GpuStruct) -> Value: - info = _numpy_type_to_info(array_or_struct.dtype) - if isinstance(array_or_struct, np.ndarray): - data = ctypes.cast(array_or_struct.ctypes.data, ctypes.c_void_p) - else: - data = ctypes.cast(ctypes.pointer(array_or_struct._data), ctypes.c_void_p) +def to_cccl_value(array: np.ndarray) -> Value: + info = _numpy_type_to_info(array.dtype) + data = ctypes.cast(array.ctypes.data, ctypes.c_void_p) return Value(info, data) diff --git a/python/cuda_parallel/cuda/parallel/experimental/algorithms/reduce.py b/python/cuda_parallel/cuda/parallel/experimental/algorithms/reduce.py index 5aa45d81a72..13b2b2fd3ad 100644 --- a/python/cuda_parallel/cuda/parallel/experimental/algorithms/reduce.py +++ b/python/cuda_parallel/cuda/parallel/experimental/algorithms/reduce.py @@ -74,7 +74,9 @@ def __init__( d_in_cccl, d_out_cccl, self.op_wrapper.handle(), - cccl.to_cccl_value(h_init), + cccl.to_cccl_value( + h_init if isinstance(h_init, np.ndarray) else h_init._data + ), cc_major, cc_minor, ctypes.c_char_p(cub_path), @@ -121,7 +123,9 @@ def __call__( d_out_cccl, ctypes.c_ulonglong(num_items), self.op_wrapper.handle(), - cccl.to_cccl_value(h_init), + cccl.to_cccl_value( + h_init if isinstance(h_init, np.ndarray) else h_init._data + ), None, ) if error != enums.CUDA_SUCCESS: diff --git a/python/cuda_parallel/cuda/parallel/experimental/struct.py b/python/cuda_parallel/cuda/parallel/experimental/struct.py index f6014d7963c..f91f16c3a7b 100644 --- a/python/cuda_parallel/cuda/parallel/experimental/struct.py +++ b/python/cuda_parallel/cuda/parallel/experimental/struct.py @@ -55,13 +55,15 @@ def gpu_struct(this: type) -> Type[GpuStruct]: # create CuPy/NumPy arrays of this type. setattr(this, "dtype", np.dtype(list(anns.items()))) - # Define __post_init__ to create a ctypes object from the fields, - # and keep a reference to it in the `._data` attribute. This - # ctypes object is what is ultimately passed to the underlying C - # library and we need to make sure we keep it alive. + # Define __post_init__ to create a numpy struct from the fields, + # and keep a reference to it in the `._data` attribute. The data + # underlying this array is what is ultimately passed to the C + # library, and we need to keep a reference to it for the lifetime + # of the object. def __post_init__(self): - ctypes_typ = np.ctypeslib.as_ctypes_type(this.dtype) - self._data = ctypes_typ(*(getattr(self, name) for name in this.dtype.names)) + self._data = np.array( + [tuple(getattr(self, name) for name in anns)], dtype=self.dtype + ) setattr(this, "__post_init__", __post_init__) diff --git a/python/cuda_parallel/tests/test_reduce_api.py b/python/cuda_parallel/tests/test_reduce_api.py index 9cea1cc89fe..22d824caee9 100644 --- a/python/cuda_parallel/tests/test_reduce_api.py +++ b/python/cuda_parallel/tests/test_reduce_api.py @@ -181,9 +181,13 @@ def square_op(a): def test_reduce_struct_type(): + # example-begin reduce-struct + import cupy as cp + import numpy as np + + from cuda.parallel.experimental import algorithms from cuda.parallel.experimental.struct import gpu_struct - # example-begin reduce-struct @gpu_struct class Pixel: r: np.int32 @@ -209,4 +213,3 @@ def max_g_value(x, y): np.testing.assert_equal(expected["g"], d_out.get()["g"]) # example-end reduce-struct -