Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move cudf._lib.unary to cudf.core._internals #17318

Merged
merged 2 commits into from
Nov 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion python/cudf/cudf/_lib/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@ set(cython_sources
transform.pyx
transpose.pyx
types.pyx
unary.pyx
utils.pyx
)
set(linked_libraries cudf::cudf)
Expand Down
1 change: 0 additions & 1 deletion python/cudf/cudf/_lib/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@
text,
timezone,
transpose,
unary,
)

MAX_COLUMN_SIZE = np.iinfo(np.int32).max
Expand Down
9 changes: 9 additions & 0 deletions python/cudf/cudf/_lib/column.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,12 @@

from __future__ import annotations

from typing import Literal

from typing_extensions import Self

import pylibcudf as plc

from cudf._typing import Dtype, DtypeObj, ScalarLike
from cudf.core.buffer import Buffer
from cudf.core.column import ColumnBase
Expand Down Expand Up @@ -71,3 +75,8 @@ class Column:
# TODO: The val parameter should be Scalar, not ScalarLike
@staticmethod
def from_scalar(val: ScalarLike, size: int) -> ColumnBase: ...
@staticmethod
def from_pylibcudf(
col: plc.Column, data_ptr_exposed: bool = False
) -> ColumnBase: ...
def to_pylibcudf(self, mode: Literal["read", "write"]) -> plc.Column: ...
60 changes: 0 additions & 60 deletions python/cudf/cudf/_lib/unary.pyx

This file was deleted.

64 changes: 64 additions & 0 deletions python/cudf/cudf/core/_internals/unary.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
# Copyright (c) 2020-2024, NVIDIA CORPORATION.
from __future__ import annotations

from typing import TYPE_CHECKING

import pylibcudf as plc

from cudf._lib.types import dtype_to_pylibcudf_type
from cudf.api.types import is_decimal_dtype
from cudf.core.buffer import acquire_spill_lock

if TYPE_CHECKING:
from cudf._typing import Dtype
from cudf.core.column import ColumnBase


@acquire_spill_lock()
def unary_operation(
col: ColumnBase, op: plc.unary.UnaryOperator
) -> ColumnBase:
return type(col).from_pylibcudf(
plc.unary.unary_operation(col.to_pylibcudf(mode="read"), op)
)


@acquire_spill_lock()
def is_null(col: ColumnBase) -> ColumnBase:
return type(col).from_pylibcudf(
plc.unary.is_null(col.to_pylibcudf(mode="read"))
)


@acquire_spill_lock()
def is_valid(col: ColumnBase) -> ColumnBase:
return type(col).from_pylibcudf(
plc.unary.is_valid(col.to_pylibcudf(mode="read"))
)


@acquire_spill_lock()
def cast(col: ColumnBase, dtype: Dtype) -> ColumnBase:
result = type(col).from_pylibcudf(
plc.unary.cast(
col.to_pylibcudf(mode="read"), dtype_to_pylibcudf_type(dtype)
)
)

if is_decimal_dtype(result.dtype):
result.dtype.precision = dtype.precision # type: ignore[union-attr]
return result


@acquire_spill_lock()
def is_nan(col: ColumnBase) -> ColumnBase:
return type(col).from_pylibcudf(
plc.unary.is_nan(col.to_pylibcudf(mode="read"))
)


@acquire_spill_lock()
def is_non_nan(col: ColumnBase) -> ColumnBase:
return type(col).from_pylibcudf(
plc.unary.is_not_nan(col.to_pylibcudf(mode="read"))
)
9 changes: 5 additions & 4 deletions python/cudf/cudf/core/column/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import cudf
from cudf import _lib as libcudf
from cudf._lib.transform import bools_to_mask
from cudf.core._internals import unary
from cudf.core.column import column
from cudf.core.column.methods import ColumnMethods
from cudf.core.dtypes import CategoricalDtype, IntervalDtype
Expand Down Expand Up @@ -1018,12 +1019,12 @@ def isnull(self) -> ColumnBase:
"""
Identify missing values in a CategoricalColumn.
"""
result = libcudf.unary.is_null(self)
result = unary.is_null(self)

if self.categories.dtype.kind == "f":
# Need to consider `np.nan` values in case
# of an underlying float column
categories = libcudf.unary.is_nan(self.categories)
categories = unary.is_nan(self.categories)
if categories.any():
code = self._encode(np.nan)
result = result | (self.codes == cudf.Scalar(code))
Expand All @@ -1034,12 +1035,12 @@ def notnull(self) -> ColumnBase:
"""
Identify non-missing values in a CategoricalColumn.
"""
result = libcudf.unary.is_valid(self)
result = unary.is_valid(self)

if self.categories.dtype.kind == "f":
# Need to consider `np.nan` values in case
# of an underlying float column
categories = libcudf.unary.is_nan(self.categories)
categories = unary.is_nan(self.categories)
if categories.any():
code = self._encode(np.nan)
result = result & (self.codes != cudf.Scalar(code))
Expand Down
9 changes: 5 additions & 4 deletions python/cudf/cudf/core/column/column.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
is_string_dtype,
)
from cudf.core._compat import PANDAS_GE_210
from cudf.core._internals import unary
from cudf.core._internals.timezones import get_compatible_timezone
from cudf.core.abc import Serializable
from cudf.core.buffer import (
Expand Down Expand Up @@ -713,12 +714,12 @@ def isnull(self) -> ColumnBase:
if not self.has_nulls(include_nan=self.dtype.kind == "f"):
return as_column(False, length=len(self))

result = libcudf.unary.is_null(self)
result = unary.is_null(self)

if self.dtype.kind == "f":
# Need to consider `np.nan` values in case
# of a float column
result = result | libcudf.unary.is_nan(self)
result = result | unary.is_nan(self)

return result

Expand All @@ -727,12 +728,12 @@ def notnull(self) -> ColumnBase:
if not self.has_nulls(include_nan=self.dtype.kind == "f"):
return as_column(True, length=len(self))

result = libcudf.unary.is_valid(self)
result = unary.is_valid(self)

if self.dtype.kind == "f":
# Need to consider `np.nan` values in case
# of a float column
result = result & libcudf.unary.is_non_nan(self)
result = result & unary.is_non_nan(self)

return result

Expand Down
3 changes: 2 additions & 1 deletion python/cudf/cudf/core/column/datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from cudf._lib.labeling import label_bins
from cudf._lib.search import search_sorted
from cudf.core._compat import PANDAS_GE_220
from cudf.core._internals import unary
from cudf.core._internals.timezones import (
check_ambiguous_and_nonexistent,
get_compatible_timezone,
Expand Down Expand Up @@ -487,7 +488,7 @@ def as_datetime_column(self, dtype: Dtype) -> DatetimeColumn:
"Cannot use .astype to convert from timezone-naive dtype to timezone-aware dtype. "
"Use tz_localize instead."
)
return libcudf.unary.cast(self, dtype=dtype)
return unary.cast(self, dtype=dtype) # type: ignore[return-value]

def as_timedelta_column(self, dtype: Dtype) -> None: # type: ignore[override]
raise TypeError(
Expand Down
5 changes: 3 additions & 2 deletions python/cudf/cudf/core/column/decimal.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from_decimal as cpp_from_decimal,
)
from cudf.api.types import is_scalar
from cudf.core._internals import unary
from cudf.core.buffer import as_buffer
from cudf.core.column import ColumnBase
from cudf.core.dtypes import (
Expand Down Expand Up @@ -85,7 +86,7 @@ def as_decimal_column(

if dtype == self.dtype:
return self
return libcudf.unary.cast(self, dtype)
return unary.cast(self, dtype) # type: ignore[return-value]

def as_string_column(self) -> cudf.core.column.StringColumn:
if len(self) > 0:
Expand Down Expand Up @@ -232,7 +233,7 @@ def _decimal_quantile(
def as_numerical_column(
self, dtype: Dtype
) -> "cudf.core.column.NumericalColumn":
return libcudf.unary.cast(self, dtype)
return unary.cast(self, dtype) # type: ignore[return-value]


class Decimal32Column(DecimalBaseColumn):
Expand Down
11 changes: 6 additions & 5 deletions python/cudf/cudf/core/column/numerical.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import cudf
from cudf import _lib as libcudf
from cudf.api.types import is_integer, is_scalar
from cudf.core._internals import unary
from cudf.core.column import ColumnBase, as_column, column, string
from cudf.core.dtypes import CategoricalDtype
from cudf.core.mixins import BinaryOperand
Expand Down Expand Up @@ -125,7 +126,7 @@ def indices_of(self, value: ScalarLike) -> NumericalColumn:
and self.dtype.kind in {"c", "f"}
and np.isnan(value)
):
nan_col = libcudf.unary.is_nan(self)
nan_col = unary.is_nan(self)
return nan_col.indices_of(True)
else:
return super().indices_of(value)
Expand Down Expand Up @@ -184,7 +185,7 @@ def unary_operator(self, unaryop: str | Callable) -> ColumnBase:
unaryop = unaryop.upper()
unaryop = _unaryop_map.get(unaryop, unaryop)
unaryop = pylibcudf.unary.UnaryOperator[unaryop]
return libcudf.unary.unary_operation(self, unaryop)
return unary.unary_operation(self, unaryop)

def __invert__(self):
if self.dtype.kind in "ui":
Expand Down Expand Up @@ -388,13 +389,13 @@ def as_timedelta_column(
def as_decimal_column(
self, dtype: Dtype
) -> "cudf.core.column.DecimalBaseColumn":
return libcudf.unary.cast(self, dtype)
return unary.cast(self, dtype) # type: ignore[return-value]

def as_numerical_column(self, dtype: Dtype) -> NumericalColumn:
dtype = cudf.dtype(dtype)
if dtype == self.dtype:
return self
return libcudf.unary.cast(self, dtype)
return unary.cast(self, dtype) # type: ignore[return-value]

def all(self, skipna: bool = True) -> bool:
# If all entries are null the result is True, including when the column
Expand All @@ -421,7 +422,7 @@ def any(self, skipna: bool = True) -> bool:
def nan_count(self) -> int:
if self.dtype.kind != "f":
return 0
nan_col = libcudf.unary.is_nan(self)
nan_col = unary.is_nan(self)
return nan_col.sum()

def _process_values_for_isin(
Expand Down
3 changes: 2 additions & 1 deletion python/cudf/cudf/core/column/timedelta.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import cudf
from cudf import _lib as libcudf
from cudf.api.types import is_scalar
from cudf.core._internals import unary
from cudf.core.buffer import Buffer, acquire_spill_lock
from cudf.core.column import ColumnBase, column, string
from cudf.utils.dtypes import np_to_pa_dtype
Expand Down Expand Up @@ -304,7 +305,7 @@ def as_string_column(self) -> cudf.core.column.StringColumn:
def as_timedelta_column(self, dtype: Dtype) -> TimeDeltaColumn:
if dtype == self.dtype:
return self
return libcudf.unary.cast(self, dtype=dtype)
return unary.cast(self, dtype=dtype) # type: ignore[return-value]

def mean(self, skipna=None) -> pd.Timedelta:
return pd.Timedelta(
Expand Down
3 changes: 2 additions & 1 deletion python/cudf/cudf/core/tools/numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from cudf import _lib as libcudf
from cudf._lib import strings as libstrings
from cudf.api.types import _is_non_decimal_numeric_dtype, is_string_dtype
from cudf.core._internals import unary
from cudf.core.column import as_column
from cudf.core.dtypes import CategoricalDtype
from cudf.core.index import ensure_index
Expand Down Expand Up @@ -171,7 +172,7 @@ def to_numeric(arg, errors="raise", downcast=None, dtype_backend=None):
downcast_dtype = cudf.dtype(t)
if downcast_dtype.itemsize <= col.dtype.itemsize:
if col.can_cast_safely(downcast_dtype):
col = libcudf.unary.cast(col, downcast_dtype)
col = unary.cast(col, downcast_dtype)
break

if isinstance(arg, (cudf.Series, pd.Series)):
Expand Down
2 changes: 1 addition & 1 deletion python/cudf/cudf/testing/testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@
from pandas import testing as tm

import cudf
from cudf._lib.unary import is_nan
from cudf.api.types import is_numeric_dtype, is_string_dtype
from cudf.core._internals.unary import is_nan
from cudf.core.missing import NA, NaT


Expand Down
Loading