Skip to content

Commit

Permalink
Fixes for CPU package (#5599)
Browse files Browse the repository at this point in the history
This PR fixes a few issues ran into while testing the different supported algorithms:

- Add pandas as runtime dependency (which already is of the GPU package through cuDF) 
- Use np.dtype as opposed to cp.dtype, they are the same object in the changes of the PR, and other similar changes to not trigger cudf/cupy/numba calls at import time, which cause runtime issues in some algos. 
- Other small fixes that can cause issues at runtime

Authors:
  - Dante Gama Dessavre (https://github.com/dantegd)

Approvers:
  - Ray Douglass (https://github.com/raydouglass)
  - William Hicks (https://github.com/wphicks)

URL: #5599
  • Loading branch information
dantegd authored Oct 4, 2023
1 parent 2299144 commit cc22891
Show file tree
Hide file tree
Showing 13 changed files with 83 additions and 49 deletions.
3 changes: 2 additions & 1 deletion conda/recipes/cuml-cpu/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -34,14 +34,15 @@ requirements:
run:
- python x.x
- numpy
- pandas
- scikit-learn=1.2
- hdbscan<=0.8.30
- umap-learn=0.5.3
- nvtx

tests: # [linux64]
imports: # [linux64]
- cuml-cpu # [linux64]
- cuml # [linux64]

about:
home: http://rapids.ai/
Expand Down
3 changes: 0 additions & 3 deletions python/cuml/cluster/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,6 @@
# limitations under the License.
#

from cuml.cluster.dbscan import DBSCAN
from cuml.cluster.kmeans import KMeans
from cuml.cluster.agglomerative import AgglomerativeClustering
from cuml.internals.device_support import GPU_ENABLED

from cuml.cluster.hdbscan import HDBSCAN
Expand Down
4 changes: 3 additions & 1 deletion python/cuml/common/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
# from cuml.internals.array import CumlArray
# from cuml.internals.array_sparse import SparseCumlArray

from cuml.internals.available_devices import is_cuda_available
from cuml.internals.array import CumlArray
from cuml.internals.array_sparse import SparseCumlArray

Expand All @@ -39,7 +40,8 @@
from cuml.common.device_selection import using_device_type


from cuml.common.pointer_utils import device_of_gpu_matrix
if is_cuda_available():
from cuml.common.pointer_utils import device_of_gpu_matrix

# legacy to be removed after complete CumlAray migration

Expand Down
4 changes: 0 additions & 4 deletions python/cuml/common/kernel_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,6 @@

# Mapping of common PyData dtypes to their corresponding C-primitive
dtype_str_map = {
cp.dtype("float32"): "float",
cp.dtype("float64"): "double",
cp.dtype("int32"): "int",
cp.dtype("int64"): "long long int",
np.dtype("float32"): "float",
np.dtype("float64"): "double",
np.dtype("int32"): "int",
Expand Down
2 changes: 1 addition & 1 deletion python/cuml/common/sparsefuncs.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ def csr_diag_mul(X, y, inplace=True):

@cuml.internals.api_return_any()
def create_csr_matrix_from_count_df(
count_df, empty_doc_ids, n_doc, n_features, dtype=cp.float32
count_df, empty_doc_ids, n_doc, n_features, dtype=np.float32
):
"""
Create a sparse matrix from the count of tokens by document
Expand Down
6 changes: 5 additions & 1 deletion python/cuml/internals/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
# limitations under the License.
#

from cuml.internals.available_devices import is_cuda_available
from cuml.internals.base_helpers import BaseMetaClass, _tags_class_and_instance
from cuml.internals.api_decorators import (
_deprecate_pos_args,
Expand All @@ -36,5 +37,8 @@
set_api_output_dtype,
set_api_output_type,
)
from cuml.internals.internals import GraphBasedDimRedCallback

if is_cuda_available():
from cuml.internals.internals import GraphBasedDimRedCallback

from cuml.internals.constants import CUML_WRAPPED_FLAG
37 changes: 23 additions & 14 deletions python/cuml/internals/base.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,8 @@ nvtx_annotate = gpu_only_import_from("nvtx", "annotate", alt=null_decorator)

import cuml
import cuml.common
import cuml.common.cuda
import cuml.internals.logger as logger
import cuml.internals
import pylibraft.common.handle
import cuml.internals.input_utils
from cuml.internals.available_devices import is_cuda_available
from cuml.internals.device_type import DeviceType
Expand Down Expand Up @@ -61,6 +59,11 @@ cp_ndarray = gpu_only_import_from('cupy', 'ndarray')
cp = gpu_only_import('cupy')


IF GPUBUILD == 1:
import pylibraft.common.handle
import cuml.common.cuda


class Base(TagsMixin,
metaclass=cuml.internals.BaseMetaClass):
"""
Expand Down Expand Up @@ -178,7 +181,7 @@ class Base(TagsMixin,
# stream and handle example:
stream = cuml.cuda.Stream()
stream = cuml.common.cuda.Stream()
handle = pylibraft.common.Handle(stream=stream)
algo = MyAlgo(handle=handle)
Expand All @@ -201,17 +204,23 @@ class Base(TagsMixin,
Constructor. All children must call init method of this base class.
"""
self.handle = pylibraft.common.handle.Handle() if handle is None \
else handle

# Internally, self.verbose follows the spdlog/c++ standard of
# 0 is most logging, and logging decreases from there.
# So if the user passes an int value for logging, we convert it.
if verbose is True:
self.verbose = logger.level_debug
elif verbose is False:
self.verbose = logger.level_info
else:
IF GPUBUILD == 1:
self.handle = pylibraft.common.handle.Handle() if handle is None \
else handle
ELSE:
self.handle = None

IF GPUBUILD == 1:
# Internally, self.verbose follows the spdlog/c++ standard of
# 0 is most logging, and logging decreases from there.
# So if the user passes an int value for logging, we convert it.
if verbose is True:
self.verbose = logger.level_debug
elif verbose is False:
self.verbose = logger.level_info
else:
self.verbose = verbose
ELSE:
self.verbose = verbose

self.output_type = _check_output_type_str(
Expand Down
2 changes: 0 additions & 2 deletions python/cuml/internals/global_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
import threading
from cuml.internals.available_devices import is_cuda_available
from cuml.internals.device_type import DeviceType
from cuml.internals.logger import warn
from cuml.internals.mem_type import MemoryType
from cuml.internals.safe_imports import cpu_only_import, gpu_only_import

Expand All @@ -38,7 +37,6 @@ def __init__(self):
default_device_type = DeviceType.device
default_memory_type = MemoryType.device
else:
warn("GPU will not be used")
default_device_type = DeviceType.host
default_memory_type = MemoryType.host
self.shared_state = {
Expand Down
27 changes: 19 additions & 8 deletions python/cuml/internals/input_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,24 +71,35 @@
CumlArray: "cuml",
SparseCumlArray: "cuml",
np_ndarray: "numpy",
cp_ndarray: "cupy",
CudfSeries: "cudf",
CudfDataFrame: "cudf",
PandasSeries: "pandas",
PandasDataFrame: "pandas",
NumbaDeviceNDArrayBase: "numba",
}


try:
_input_type_to_str[cp_ndarray] = "cupy"
_input_type_to_str[CudfSeries] = "cudf"
_input_type_to_str[CudfDataFrame] = "cudf"
_input_type_to_str[NumbaDeviceNDArrayBase] = "numba"
except UnavailableError:
pass


_input_type_to_mem_type = {
np_ndarray: MemoryType.host,
cp_ndarray: MemoryType.device,
CudfSeries: MemoryType.device,
CudfDataFrame: MemoryType.device,
PandasSeries: MemoryType.host,
PandasDataFrame: MemoryType.host,
NumbaDeviceNDArrayBase: MemoryType.device,
}


try:
_input_type_to_mem_type[cp_ndarray] = MemoryType.device
_input_type_to_mem_type[CudfSeries] = MemoryType.device
_input_type_to_mem_type[CudfDataFrame] = MemoryType.device
_input_type_to_mem_type[NumbaDeviceNDArrayBase] = MemoryType.device
except UnavailableError:
pass

_SPARSE_TYPES = [SparseCumlArray]

try:
Expand Down
10 changes: 7 additions & 3 deletions python/cuml/internals/type_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,16 @@
import functools
import typing

from cuml.internals.safe_imports import gpu_only_import
from cuml.internals.safe_imports import gpu_only_import, UnavailableError

cp = gpu_only_import("cupy")

# Those are the only data types supported by cupyx.scipy.sparse matrices.
CUPY_SPARSE_DTYPES = [cp.float32, cp.float64, cp.complex64, cp.complex128]

try:
# Those are the only data types supported by cupyx.scipy.sparse matrices.
CUPY_SPARSE_DTYPES = [cp.float32, cp.float64, cp.complex64, cp.complex128]
except UnavailableError:
CUPY_SPARSE_DTYPES = []

# Use _DecoratorType as a type variable for decorators. See:
# https://github.com/python/mypy/pull/8336/files#diff-eb668b35b7c0c4f88822160f3ca4c111f444c88a38a3b9df9bb8427131538f9cR260
Expand Down
8 changes: 5 additions & 3 deletions python/cuml/linear_model/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,13 @@
# limitations under the License.
#


from cuml.internals.device_support import GPU_ENABLED
from cuml.linear_model.elastic_net import ElasticNet
from cuml.linear_model.lasso import Lasso
from cuml.linear_model.linear_regression import LinearRegression
from cuml.linear_model.logistic_regression import LogisticRegression
from cuml.linear_model.mbsgd_classifier import MBSGDClassifier
from cuml.linear_model.mbsgd_regressor import MBSGDRegressor
from cuml.linear_model.ridge import Ridge

if GPU_ENABLED:
from cuml.linear_model.mbsgd_classifier import MBSGDClassifier
from cuml.linear_model.mbsgd_regressor import MBSGDRegressor
5 changes: 4 additions & 1 deletion python/cuml/manifold/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,5 +14,8 @@
# limitations under the License.
#

from cuml.internals.available_devices import is_cuda_available
from cuml.manifold.umap import UMAP
from cuml.manifold.t_sne import TSNE

if is_cuda_available():
from cuml.manifold.t_sne import TSNE
21 changes: 14 additions & 7 deletions python/cuml/manifold/umap.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -36,19 +36,13 @@ import cuml.internals
from cuml.internals.base import UniversalBase
from cuml.common.doc_utils import generate_docstring
from cuml.internals import logger
from cuml.internals.available_devices import is_cuda_available
from cuml.internals.input_utils import input_to_cuml_array
from cuml.internals.array import CumlArray
from cuml.internals.array_sparse import SparseCumlArray
from cuml.internals.mixins import CMajorInputTagMixin
from cuml.common.sparse_utils import is_sparse

from cuml.manifold.simpl_set import fuzzy_simplicial_set # no-cython-lint
from cuml.manifold.simpl_set import simplicial_set_embedding # no-cython-lint
# TODO: These two symbols are considered part of the public API of this module
# which is why imports should not be removed. The no-cython-lint markers can be
# replaced with an explicit __all__ specifications once
# https://github.com/MarcoGorelli/cython-lint/issues/80 is resolved.

from cuml.common.array_descriptor import CumlArrayDescriptor
from cuml.internals.api_decorators import device_interop_preparation
from cuml.internals.api_decorators import enable_device_interop
Expand All @@ -58,6 +52,19 @@ rmm = gpu_only_import('rmm')
from libc.stdint cimport uintptr_t


if is_cuda_available():
from cuml.manifold.simpl_set import fuzzy_simplicial_set # no-cython-lint
from cuml.manifold.simpl_set import simplicial_set_embedding # no-cython-lint
# TODO: These two symbols are considered part of the public API of this module
# which is why imports should not be removed. The no-cython-lint markers can be
# replaced with an explicit __all__ specifications once
# https://github.com/MarcoGorelli/cython-lint/issues/80 is resolved.
else:
# if no GPU is present, we import the UMAP equivalents
from umap.umap_ import fuzzy_simplicial_set # no-cython-lint
from umap.umap_ import simplicial_set_embedding # no-cython-lint


IF GPUBUILD == 1:
from libc.stdlib cimport free
from cuml.manifold.umap_utils cimport *
Expand Down

0 comments on commit cc22891

Please sign in to comment.