From 02c35bfe71abcc8f5889fbc54c4f4902d1d2a29b Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 18 Nov 2024 10:48:07 -0800 Subject: [PATCH] Remove cudf._lib.quantiles in favor of inlining pylibcudf (#17347) Contributes to https://github.com/rapidsai/cudf/issues/17317 Authors: - Matthew Roeschke (https://github.com/mroeschke) Approvers: - GALI PREM SAGAR (https://github.com/galipremsagar) URL: https://github.com/rapidsai/cudf/pull/17347 --- python/cudf/cudf/_lib/CMakeLists.txt | 1 - python/cudf/cudf/_lib/__init__.py | 1 - python/cudf/cudf/_lib/utils.pxd | 2 +- python/cudf/cudf/_lib/utils.pyx | 2 +- python/cudf/cudf/core/column/decimal.py | 14 -------------- python/cudf/cudf/core/column/numerical_base.py | 16 ++++++++++++---- python/cudf/cudf/core/frame.py | 13 +++++++++---- 7 files changed, 23 insertions(+), 26 deletions(-) diff --git a/python/cudf/cudf/_lib/CMakeLists.txt b/python/cudf/cudf/_lib/CMakeLists.txt index 13beec3c7f7..b274f771479 100644 --- a/python/cudf/cudf/_lib/CMakeLists.txt +++ b/python/cudf/cudf/_lib/CMakeLists.txt @@ -31,7 +31,6 @@ set(cython_sources orc.pyx parquet.pyx partitioning.pyx - quantiles.pyx reduce.pyx replace.pyx reshape.pyx diff --git a/python/cudf/cudf/_lib/__init__.py b/python/cudf/cudf/_lib/__init__.py index a63bc1a3d1c..8455db5d2b5 100644 --- a/python/cudf/cudf/_lib/__init__.py +++ b/python/cudf/cudf/_lib/__init__.py @@ -18,7 +18,6 @@ orc, parquet, partitioning, - quantiles, reduce, replace, reshape, diff --git a/python/cudf/cudf/_lib/utils.pxd b/python/cudf/cudf/_lib/utils.pxd index f273aeb4270..6db3036d514 100644 --- a/python/cudf/cudf/_lib/utils.pxd +++ b/python/cudf/cudf/_lib/utils.pxd @@ -18,5 +18,5 @@ cdef table_view table_view_from_columns(columns) except * cdef table_view table_view_from_table(tbl, ignore_index=*) except* cdef columns_from_unique_ptr(unique_ptr[table] c_tbl) cdef columns_from_table_view(table_view tv, object owners) -cdef columns_from_pylibcudf_table(tbl) +cpdef columns_from_pylibcudf_table(tbl) cdef _data_from_columns(columns, column_names, index_names=*) diff --git a/python/cudf/cudf/_lib/utils.pyx b/python/cudf/cudf/_lib/utils.pyx index 2ccc6ca34dc..244d7fdc006 100644 --- a/python/cudf/cudf/_lib/utils.pyx +++ b/python/cudf/cudf/_lib/utils.pyx @@ -229,7 +229,7 @@ cdef columns_from_unique_ptr( return columns -cdef columns_from_pylibcudf_table(tbl): +cpdef columns_from_pylibcudf_table(tbl): """Convert a pylibcudf table into list of columns. Parameters diff --git a/python/cudf/cudf/core/column/decimal.py b/python/cudf/cudf/core/column/decimal.py index 540aa02b842..ce7aa91f775 100644 --- a/python/cudf/cudf/core/column/decimal.py +++ b/python/cudf/cudf/core/column/decimal.py @@ -3,7 +3,6 @@ from __future__ import annotations import warnings -from collections.abc import Sequence from decimal import Decimal from typing import TYPE_CHECKING, cast @@ -217,19 +216,6 @@ def normalize_binop_value(self, other): ) return NotImplemented - def _decimal_quantile( - self, q: float | Sequence[float], interpolation: str, exact: bool - ) -> ColumnBase: - quant = [float(q)] if not isinstance(q, (Sequence, np.ndarray)) else q - # get sorted indices and exclude nulls - indices = libcudf.sort.order_by( - [self], [True], "first", stable=True - ).slice(self.null_count, len(self)) - result = libcudf.quantiles.quantile( - self, quant, interpolation, indices, exact - ) - return result._with_type_metadata(self.dtype) - def as_numerical_column( self, dtype: Dtype ) -> "cudf.core.column.NumericalColumn": diff --git a/python/cudf/cudf/core/column/numerical_base.py b/python/cudf/cudf/core/column/numerical_base.py index f6ab91f2f01..6d639337401 100644 --- a/python/cudf/cudf/core/column/numerical_base.py +++ b/python/cudf/cudf/core/column/numerical_base.py @@ -7,9 +7,11 @@ import numpy as np +import pylibcudf as plc + import cudf from cudf import _lib as libcudf -from cudf.core.buffer import Buffer +from cudf.core.buffer import Buffer, acquire_spill_lock from cudf.core.column import ColumnBase from cudf.core.missing import NA from cudf.core.mixins import Scannable @@ -145,9 +147,15 @@ def quantile( indices = libcudf.sort.order_by( [self], [True], "first", stable=True ).slice(self.null_count, len(self)) - result = libcudf.quantiles.quantile( - self, q, interpolation, indices, exact - ) + with acquire_spill_lock(): + plc_column = plc.quantiles.quantile( + self.to_pylibcudf(mode="read"), + q, + plc.types.Interpolation[interpolation.upper()], + indices.to_pylibcudf(mode="read"), + exact, + ) + result = type(self).from_pylibcudf(plc_column) # type: ignore[assignment] if return_scalar: scalar_result = result.element_indexing(0) if interpolation in {"lower", "higher", "nearest"}: diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py index 2b4a17f9559..30868924bcd 100644 --- a/python/cudf/cudf/core/frame.py +++ b/python/cudf/cudf/core/frame.py @@ -799,15 +799,20 @@ def _quantile_table( null_precedence = [plc.types.NullOrder[key] for key in null_precedence] - return self._from_columns_like_self( - libcudf.quantiles.quantile_table( - [*self._columns], + with acquire_spill_lock(): + plc_table = plc.quantiles.quantiles( + plc.Table( + [c.to_pylibcudf(mode="read") for c in self._columns] + ), q, interpolation, is_sorted, column_order, null_precedence, - ), + ) + columns = libcudf.utils.columns_from_pylibcudf_table(plc_table) + return self._from_columns_like_self( + columns, column_names=self._column_names, )