rapidsai · rapids-bot · Nov 18, 2024 · Nov 14, 2024 · Nov 14, 2024
@@ -51,7 +51,6 @@ set(cython_sources
     transform.pyx
     transpose.pyx
     types.pyx
-    unary.pyx
     utils.pyx
 )
 set(linked_libraries cudf::cudf)

@@ -36,7 +36,6 @@
     text,
     timezone,
     transpose,
-    unary,
 )
 
 MAX_COLUMN_SIZE = np.iinfo(np.int32).max

@@ -2,8 +2,12 @@
 
 from __future__ import annotations
 
+from typing import Literal
+
 from typing_extensions import Self
 
+import pylibcudf as plc
+
 from cudf._typing import Dtype, DtypeObj, ScalarLike
 from cudf.core.buffer import Buffer
 from cudf.core.column import ColumnBase
@@ -71,3 +75,8 @@ class Column:
     # TODO: The val parameter should be Scalar, not ScalarLike
     @staticmethod
     def from_scalar(val: ScalarLike, size: int) -> ColumnBase: ...
+    @staticmethod
+    def from_pylibcudf(
+        col: plc.Column, data_ptr_exposed: bool = False
+    ) -> ColumnBase: ...
+    def to_pylibcudf(self, mode: Literal["read", "write"]) -> plc.Column: ...
@@ -0,0 +1,64 @@
+# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import pylibcudf as plc
+
+from cudf._lib.types import dtype_to_pylibcudf_type
+from cudf.api.types import is_decimal_dtype
+from cudf.core.buffer import acquire_spill_lock
+
+if TYPE_CHECKING:
+    from cudf._typing import Dtype
+    from cudf.core.column import ColumnBase
+
+
+@acquire_spill_lock()
+def unary_operation(
+    col: ColumnBase, op: plc.unary.UnaryOperator
+) -> ColumnBase:
+    return type(col).from_pylibcudf(
+        plc.unary.unary_operation(col.to_pylibcudf(mode="read"), op)
+    )
+
+
+@acquire_spill_lock()
+def is_null(col: ColumnBase) -> ColumnBase:
+    return type(col).from_pylibcudf(
+        plc.unary.is_null(col.to_pylibcudf(mode="read"))
+    )
+
+
+@acquire_spill_lock()
+def is_valid(col: ColumnBase) -> ColumnBase:
+    return type(col).from_pylibcudf(
+        plc.unary.is_valid(col.to_pylibcudf(mode="read"))
+    )
+
+
+@acquire_spill_lock()
+def cast(col: ColumnBase, dtype: Dtype) -> ColumnBase:
+    result = type(col).from_pylibcudf(
+        plc.unary.cast(
+            col.to_pylibcudf(mode="read"), dtype_to_pylibcudf_type(dtype)
+        )
+    )
+
+    if is_decimal_dtype(result.dtype):
+        result.dtype.precision = dtype.precision  # type: ignore[union-attr]
+    return result
+
+
+@acquire_spill_lock()
+def is_nan(col: ColumnBase) -> ColumnBase:
+    return type(col).from_pylibcudf(
+        plc.unary.is_nan(col.to_pylibcudf(mode="read"))
+    )
+
+
+@acquire_spill_lock()
+def is_non_nan(col: ColumnBase) -> ColumnBase:
+    return type(col).from_pylibcudf(
+        plc.unary.is_not_nan(col.to_pylibcudf(mode="read"))
+    )
@@ -14,6 +14,7 @@
 import cudf
 from cudf import _lib as libcudf
 from cudf._lib.transform import bools_to_mask
+from cudf.core._internals import unary
 from cudf.core.column import column
 from cudf.core.column.methods import ColumnMethods
 from cudf.core.dtypes import CategoricalDtype, IntervalDtype
@@ -1018,12 +1019,12 @@ def isnull(self) -> ColumnBase:
         """
         Identify missing values in a CategoricalColumn.
         """
-        result = libcudf.unary.is_null(self)
+        result = unary.is_null(self)
 
         if self.categories.dtype.kind == "f":
             # Need to consider `np.nan` values in case
             # of an underlying float column
-            categories = libcudf.unary.is_nan(self.categories)
+            categories = unary.is_nan(self.categories)
             if categories.any():
                 code = self._encode(np.nan)
                 result = result | (self.codes == cudf.Scalar(code))
@@ -1034,12 +1035,12 @@ def notnull(self) -> ColumnBase:
         """
         Identify non-missing values in a CategoricalColumn.
         """
-        result = libcudf.unary.is_valid(self)
+        result = unary.is_valid(self)
 
         if self.categories.dtype.kind == "f":
             # Need to consider `np.nan` values in case
             # of an underlying float column
-            categories = libcudf.unary.is_nan(self.categories)
+            categories = unary.is_nan(self.categories)
             if categories.any():
                 code = self._encode(np.nan)
                 result = result & (self.codes != cudf.Scalar(code))

@@ -47,6 +47,7 @@
     is_string_dtype,
 )
 from cudf.core._compat import PANDAS_GE_210
+from cudf.core._internals import unary
 from cudf.core._internals.timezones import get_compatible_timezone
 from cudf.core.abc import Serializable
 from cudf.core.buffer import (
@@ -713,12 +714,12 @@ def isnull(self) -> ColumnBase:
         if not self.has_nulls(include_nan=self.dtype.kind == "f"):
             return as_column(False, length=len(self))
 
-        result = libcudf.unary.is_null(self)
+        result = unary.is_null(self)
 
         if self.dtype.kind == "f":
             # Need to consider `np.nan` values in case
             # of a float column
-            result = result | libcudf.unary.is_nan(self)
+            result = result | unary.is_nan(self)
 
         return result
 
@@ -727,12 +728,12 @@ def notnull(self) -> ColumnBase:
         if not self.has_nulls(include_nan=self.dtype.kind == "f"):
             return as_column(True, length=len(self))
 
-        result = libcudf.unary.is_valid(self)
+        result = unary.is_valid(self)
 
         if self.dtype.kind == "f":
             # Need to consider `np.nan` values in case
             # of a float column
-            result = result & libcudf.unary.is_non_nan(self)
+            result = result & unary.is_non_nan(self)
 
         return result
 

@@ -19,6 +19,7 @@
 from cudf._lib.labeling import label_bins
 from cudf._lib.search import search_sorted
 from cudf.core._compat import PANDAS_GE_220
+from cudf.core._internals import unary
 from cudf.core._internals.timezones import (
     check_ambiguous_and_nonexistent,
     get_compatible_timezone,
@@ -487,7 +488,7 @@ def as_datetime_column(self, dtype: Dtype) -> DatetimeColumn:
                 "Cannot use .astype to convert from timezone-naive dtype to timezone-aware dtype. "
                 "Use tz_localize instead."
             )
-        return libcudf.unary.cast(self, dtype=dtype)
+        return unary.cast(self, dtype=dtype)  # type: ignore[return-value]
 
     def as_timedelta_column(self, dtype: Dtype) -> None:  # type: ignore[override]
         raise TypeError(

@@ -17,6 +17,7 @@
     from_decimal as cpp_from_decimal,
 )
 from cudf.api.types import is_scalar
+from cudf.core._internals import unary
 from cudf.core.buffer import as_buffer
 from cudf.core.column import ColumnBase
 from cudf.core.dtypes import (
@@ -85,7 +86,7 @@ def as_decimal_column(
 
         if dtype == self.dtype:
             return self
-        return libcudf.unary.cast(self, dtype)
+        return unary.cast(self, dtype)  # type: ignore[return-value]
 
     def as_string_column(self) -> cudf.core.column.StringColumn:
         if len(self) > 0:
@@ -232,7 +233,7 @@ def _decimal_quantile(
     def as_numerical_column(
         self, dtype: Dtype
     ) -> "cudf.core.column.NumericalColumn":
-        return libcudf.unary.cast(self, dtype)
+        return unary.cast(self, dtype)  # type: ignore[return-value]
 
 
 class Decimal32Column(DecimalBaseColumn):

@@ -14,6 +14,7 @@
 import cudf
 from cudf import _lib as libcudf
 from cudf.api.types import is_integer, is_scalar
+from cudf.core._internals import unary
 from cudf.core.column import ColumnBase, as_column, column, string
 from cudf.core.dtypes import CategoricalDtype
 from cudf.core.mixins import BinaryOperand
@@ -125,7 +126,7 @@ def indices_of(self, value: ScalarLike) -> NumericalColumn:
             and self.dtype.kind in {"c", "f"}
             and np.isnan(value)
         ):
-            nan_col = libcudf.unary.is_nan(self)
+            nan_col = unary.is_nan(self)
             return nan_col.indices_of(True)
         else:
             return super().indices_of(value)
@@ -184,7 +185,7 @@ def unary_operator(self, unaryop: str | Callable) -> ColumnBase:
         unaryop = unaryop.upper()
         unaryop = _unaryop_map.get(unaryop, unaryop)
         unaryop = pylibcudf.unary.UnaryOperator[unaryop]
-        return libcudf.unary.unary_operation(self, unaryop)
+        return unary.unary_operation(self, unaryop)
 
     def __invert__(self):
         if self.dtype.kind in "ui":
@@ -388,13 +389,13 @@ def as_timedelta_column(
     def as_decimal_column(
         self, dtype: Dtype
     ) -> "cudf.core.column.DecimalBaseColumn":
-        return libcudf.unary.cast(self, dtype)
+        return unary.cast(self, dtype)  # type: ignore[return-value]
 
     def as_numerical_column(self, dtype: Dtype) -> NumericalColumn:
         dtype = cudf.dtype(dtype)
         if dtype == self.dtype:
             return self
-        return libcudf.unary.cast(self, dtype)
+        return unary.cast(self, dtype)  # type: ignore[return-value]
 
     def all(self, skipna: bool = True) -> bool:
         # If all entries are null the result is True, including when the column
@@ -421,7 +422,7 @@ def any(self, skipna: bool = True) -> bool:
     def nan_count(self) -> int:
         if self.dtype.kind != "f":
             return 0
-        nan_col = libcudf.unary.is_nan(self)
+        nan_col = unary.is_nan(self)
         return nan_col.sum()
 
     def _process_values_for_isin(

@@ -13,6 +13,7 @@
 import cudf
 from cudf import _lib as libcudf
 from cudf.api.types import is_scalar
+from cudf.core._internals import unary
 from cudf.core.buffer import Buffer, acquire_spill_lock
 from cudf.core.column import ColumnBase, column, string
 from cudf.utils.dtypes import np_to_pa_dtype
@@ -304,7 +305,7 @@ def as_string_column(self) -> cudf.core.column.StringColumn:
     def as_timedelta_column(self, dtype: Dtype) -> TimeDeltaColumn:
         if dtype == self.dtype:
             return self
-        return libcudf.unary.cast(self, dtype=dtype)
+        return unary.cast(self, dtype=dtype)  # type: ignore[return-value]
 
     def mean(self, skipna=None) -> pd.Timedelta:
         return pd.Timedelta(

@@ -11,6 +11,7 @@
 from cudf import _lib as libcudf
 from cudf._lib import strings as libstrings
 from cudf.api.types import _is_non_decimal_numeric_dtype, is_string_dtype
+from cudf.core._internals import unary
 from cudf.core.column import as_column
 from cudf.core.dtypes import CategoricalDtype
 from cudf.core.index import ensure_index
@@ -171,7 +172,7 @@ def to_numeric(arg, errors="raise", downcast=None, dtype_backend=None):
             downcast_dtype = cudf.dtype(t)
             if downcast_dtype.itemsize <= col.dtype.itemsize:
                 if col.can_cast_safely(downcast_dtype):
-                    col = libcudf.unary.cast(col, downcast_dtype)
+                    col = unary.cast(col, downcast_dtype)
                     break
 
     if isinstance(arg, (cudf.Series, pd.Series)):

@@ -10,8 +10,8 @@
 from pandas import testing as tm
 
 import cudf
-from cudf._lib.unary import is_nan
 from cudf.api.types import is_numeric_dtype, is_string_dtype
+from cudf.core._internals.unary import is_nan
 from cudf.core.missing import NA, NaT