diff --git a/examples/pykokkos/multi_gpu.py b/examples/pykokkos/multi_gpu.py new file mode 100644 index 00000000..d20fd2c6 --- /dev/null +++ b/examples/pykokkos/multi_gpu.py @@ -0,0 +1,60 @@ +import pykokkos as pk + +import numpy as np +import cupy as cp + +pk.set_default_space(pk.Cuda) + +size = 10000 + +pk.set_device_id(0) +cp_arr_0 = cp.arange(size).astype(np.int32) + +pk.set_device_id(1) +cp_arr_1 = cp.arange(size).astype(np.int32) + +print(cp_arr_0.device) +print(cp_arr_1.device) + +@pk.workunit(cp_arr = pk.ViewTypeInfo(space=pk.CudaSpace)) +def reduction_cp(i: int, acc: pk.Acc[int], cp_arr: pk.View1D[int]): + acc += cp_arr[i] + +pk.set_device_id(1) +cp_view_0 = pk.from_cupy(cp_arr_1) +result_0 = pk.parallel_reduce(pk.RangePolicy(pk.Cuda, 0, size), reduction_cp, cp_arr=cp_view_0) +print(result_0) + +pk.set_device_id(0) +cp_view_1 = pk.from_cupy(cp_arr_0) +result_1 = pk.parallel_reduce(pk.RangePolicy(pk.Cuda, 0, size), reduction_cp, cp_arr=cp_view_1) + +print(f"Reducing array 0: {result_0}") +print(f"Reducing array 1: {result_1}") +print(f"Sum: {result_0 + result_1}") + +pk.set_device_id(0) +view_0 = pk.View((size,), dtype=int) + +pk.set_device_id(1) +view_1 = pk.View((size,), dtype=int) + +@pk.workunit +def init_view(i: int, view: pk.View1D[int]): + view[i] = i + +@pk.workunit +def reduce_view(i: int, acc: pk.Acc[int], view: pk.View1D[int]): + acc += view[i] + +pk.set_device_id(0) +pk.parallel_for(pk.RangePolicy(pk.Cuda, 0, size), init_view, view=view_0) +result_0 = pk.parallel_reduce(pk.RangePolicy(pk.Cuda, 0, size), reduce_view, view=view_0) + +pk.set_device_id(1) +pk.parallel_for(pk.RangePolicy(pk.Cuda, 0, size), init_view, view=view_1) +result_1 = pk.parallel_reduce(pk.RangePolicy(pk.Cuda, 0, size), reduce_view, view=view_1) + +print(f"Reducing view 0: {result_0}") +print(f"Reducing view 1: {result_1}") +print(f"Sum: {result_0 + result_1}") diff --git a/pykokkos/__init__.py b/pykokkos/__init__.py index f2947662..40df672c 100644 --- a/pykokkos/__init__.py +++ b/pykokkos/__init__.py @@ -8,7 +8,8 @@ initialize, finalize, get_default_space, set_default_space, get_default_precision, set_default_precision, - is_uvm_enabled, enable_uvm, disable_uvm + is_uvm_enabled, enable_uvm, disable_uvm, + set_device_id ) initialize() diff --git a/pykokkos/core/compile.sh b/pykokkos/core/compile.sh index cee2c609..1b96e118 100755 --- a/pykokkos/core/compile.sh +++ b/pykokkos/core/compile.sh @@ -9,6 +9,8 @@ PK_REAL="${6}" KOKKOS_LIB_PATH="${7}" KOKKOS_INCLUDE_PATH="${8}" COMPUTE_CAPABILITY="${9}" +LIB_SUFFIX="${10}" +COMPILER_PATH="${11}" SRC=$(find -name "*.cpp") @@ -34,11 +36,11 @@ if [ "${COMPILER}" == "g++" ]; then -shared \ -fopenmp \ "${SRC}".o -o "${MODULE}" \ - "${KOKKOS_LIB_PATH}/libkokkoscontainers.so" \ - "${KOKKOS_LIB_PATH}/libkokkoscore.so" + "${KOKKOS_LIB_PATH}/libkokkoscontainers${LIB_SUFFIX}.so" \ + "${KOKKOS_LIB_PATH}/libkokkoscore${LIB_SUFFIX}.so" elif [ "${COMPILER}" == "nvcc" ]; then - "${KOKKOS_LIB_PATH}/../bin/nvcc_wrapper" \ + "${COMPILER_PATH}" \ `python3 -m pybind11 --includes` \ -I.. \ -O3 \ @@ -54,7 +56,7 @@ elif [ "${COMPILER}" == "nvcc" ]; then -Dpk_exec_space="Kokkos::${EXEC_SPACE}" \ -Dpk_real="${PK_REAL}" - "${KOKKOS_LIB_PATH}/../bin/nvcc_wrapper" \ + "${COMPILER_PATH}" \ -I.. \ -O3 \ -shared \ @@ -62,6 +64,6 @@ elif [ "${COMPILER}" == "nvcc" ]; then --expt-extended-lambda \ -fopenmp \ "${SRC}".o -o "${MODULE}" \ - "${KOKKOS_LIB_PATH}/libkokkoscontainers.so" \ - "${KOKKOS_LIB_PATH}/libkokkoscore.so" + "${KOKKOS_LIB_PATH}/libkokkoscontainers${LIB_SUFFIX}.so" \ + "${KOKKOS_LIB_PATH}/libkokkoscore${LIB_SUFFIX}.so" fi \ No newline at end of file diff --git a/pykokkos/core/compiler.py b/pykokkos/core/compiler.py index 4c985f11..d03f9d5c 100644 --- a/pykokkos/core/compiler.py +++ b/pykokkos/core/compiler.py @@ -178,7 +178,7 @@ def compile_entity( if module_setup.is_compiled(): return - cpp_setup = CppSetup(module_setup.module_file, self.functor_file, self.bindings_file) + cpp_setup = CppSetup(module_setup.module_file, module_setup.gpu_module_files, self.functor_file, self.bindings_file) translator = StaticTranslator(module_setup.name, self.functor_file, members) t_start: float = time.perf_counter() diff --git a/pykokkos/core/cpp_setup.py b/pykokkos/core/cpp_setup.py index 59951936..cb0e49b0 100644 --- a/pykokkos/core/cpp_setup.py +++ b/pykokkos/core/cpp_setup.py @@ -3,10 +3,13 @@ import shutil import subprocess import sys +from types import ModuleType from typing import List, Tuple - -from pykokkos.interface import ExecutionSpace, get_default_layout, get_default_memory_space +from pykokkos.interface import ( + ExecutionSpace, get_default_layout, get_default_memory_space, + is_host_execution_space +) import pykokkos.kokkos_manager as km @@ -15,16 +18,18 @@ class CppSetup: Creates the directory to hold the translation and invokes the compiler """ - def __init__(self, module_file: str, functor: str, bindings: str): + def __init__(self, module_file: str, gpu_module_files: List[str], functor: str, bindings: str): """ CppSetup constructor :param module: the name of the file containing the compiled Python module + :param gpu_module_files: the list of names of files containing for each gpu module :param functor: the name of the generated functor file :param bindings: the name of the generated bindings file """ self.module_file: str = module_file + self.gpu_module_files: List[str] = gpu_module_files self.functor_file: str = functor self.bindings_file: str = bindings @@ -58,6 +63,8 @@ def compile( self.write_source(output_dir, functor, bindings) self.copy_script(output_dir) self.invoke_script(output_dir, space, enable_uvm, compiler) + if space is ExecutionSpace.Cuda and km.is_multi_gpu_enabled(): + self.copy_multi_gpu_kernel(output_dir) def initialize_directory(self, name: Path) -> None: @@ -115,15 +122,17 @@ def copy_script(self, output_dir: Path) -> None: print(f"Exception while copying views and makefile: {ex}") sys.exit(1) - def get_kokkos_paths(self) -> Tuple[Path, Path]: + def get_kokkos_paths(self, space: ExecutionSpace, compiler: str) -> Tuple[Path, Path, Path]: """ Get the paths of the Kokkos instal lib and include directories. If the environment variable is set, use that - Kokkos install. If not, fall back to installed pykokkos-base - package. + Kokkos install. If not, fall back to the installed + pykokkos-base package. - :returns: a tuple of paths to the Kokkos lib/ and include/ - directories respectively + :param space: the execution space to compile for + :param compiler: what compiler to use + :returns: a tuple of paths to the Kokkos lib/, include/, + and compiler to be used """ lib_path: Path @@ -139,20 +148,46 @@ def get_kokkos_paths(self) -> Tuple[Path, Path]: return lib_path, include_path - from pykokkos.bindings import kokkos - install_path = Path(kokkos.__path__[0]).parent + is_cpu: bool = is_host_execution_space(space) + kokkos_lib: ModuleType = km.get_kokkos_module(is_cpu) + install_path = Path(kokkos_lib.__path__[0]) + lib_parent_path: Path + if km.is_multi_gpu_enabled(): + lib_parent_path = install_path + else: + lib_parent_path = install_path.parent - if (install_path / "lib").is_dir(): - lib_path = install_path / "lib" - elif (install_path / "lib64").is_dir(): - lib_path = install_path / "lib64" + if (lib_parent_path / "lib").is_dir(): + lib_path = lib_parent_path / "lib" + elif (lib_parent_path / "lib64").is_dir(): + lib_path = lib_parent_path / "lib64" else: raise RuntimeError("lib/ or lib64/ directories not found in installed pykokkos-base package." f" Try setting {self.lib_path_env} instead.") - include_path = lib_path.parent / "include/kokkos" + include_path = install_path.parent / "include/kokkos" + + compiler_path: Path + if compiler != "nvcc": + compiler_path = Path("g++") + else: + compiler_path = install_path.parent / "bin/nvcc_wrapper" + + return lib_path, include_path, compiler_path + + def get_kokkos_lib_suffix(self, space: ExecutionSpace) -> str: + """ + Get the suffix of the libkokkoscore and libkokkoscontainers + libraries corresponding to the enabled device + + :param space: the execution space to compile for + :returns: the suffix as a string + """ + + if is_host_execution_space(space) or not km.is_multi_gpu_enabled(): + return "" - return lib_path, include_path + return f"_{km.get_device_id()}" def invoke_script(self, output_dir: Path, space: ExecutionSpace, enable_uvm: bool, compiler: str) -> None: """ @@ -176,8 +211,10 @@ def invoke_script(self, output_dir: Path, space: ExecutionSpace, enable_uvm: boo precision: str = km.get_default_precision().__name__.split(".")[-1] lib_path: Path include_path: Path - lib_path, include_path = self.get_kokkos_paths() + compiler_path: Path + lib_path, include_path, compiler_path = self.get_kokkos_paths(space, compiler) compute_capability: str = self.get_cuda_compute_capability(compiler) + lib_suffix: str = self.get_kokkos_lib_suffix(space) command: List[str] = [f"./{self.script}", compiler, # What compiler to use @@ -188,7 +225,9 @@ def invoke_script(self, output_dir: Path, space: ExecutionSpace, enable_uvm: boo precision, # Default real precision str(lib_path), # Path to Kokkos install lib/ directory str(include_path), # Path to Kokkos install include/ directory - compute_capability] # Device compute capability + compute_capability, # Device compute capability + lib_suffix, # The libkokkos* suffix identifying the gpu + str(compiler_path)] # The path to the compiler to use compile_result = subprocess.run(command, cwd=output_dir, capture_output=True, check=False) if compile_result.returncode != 0: @@ -207,6 +246,49 @@ def invoke_script(self, output_dir: Path, space: ExecutionSpace, enable_uvm: boo print(f"patchelf failed") sys.exit(1) + def copy_multi_gpu_kernel(self, output_dir: Path) -> None: + """ + Copy the kernel .so file once for each device and run patchelf + to point to the right library + + :param output_dir: the base directory + """ + + original_module: Path = output_dir / self.module_file + for id, (kernel_filename, kokkos_gpu_module) in enumerate(zip(self.gpu_module_files, km.get_kokkos_gpu_modules())): + kernel_path: Path = output_dir / kernel_filename + + try: + shutil.copy(original_module, kernel_path) + except Exception as ex: + print(f"Exception while copying kernel: {ex}") + sys.exit(1) + + lib_path: Path = Path(kokkos_gpu_module.__path__[0]) / "lib" + patchelf: List[str] = ["patchelf", + "--set-rpath", + str(lib_path), + kernel_filename] + + patchelf_result = subprocess.run(patchelf, cwd=output_dir, capture_output=True, check=False) + if patchelf_result.returncode != 0: + print(patchelf_result.stderr.decode("utf-8")) + print(f"patchelf failed") + sys.exit(1) + + # Now replace the needed libkokkos* libraries with the correct version + needed_libraries: str = subprocess.run(["patchelf", "--print-needed", kernel_filename], cwd=output_dir, capture_output=True, check=False).stdout.decode("utf-8") + + for line in needed_libraries.splitlines(): + if "libkokkoscore" in line or "libkokkoscontainers" in line: + # Line will be of the form f"libkokkoscore_{id}.so.3.4" + # This will extract id + current_id: int = int(line.split("_")[1].split(".")[0]) + to_remove: str = line + to_add: str = line.replace(f"_{current_id}", f"_{id}") + + subprocess.run(["patchelf", "--replace-needed", to_remove, to_add, kernel_filename], cwd=output_dir, capture_output=True, check=False) + def get_cuda_compute_capability(self, compiler: str) -> str: """ Get the compute capability of an Nvidia GPU diff --git a/pykokkos/core/module_setup.py b/pykokkos/core/module_setup.py index 3f294d02..028cec92 100644 --- a/pykokkos/core/module_setup.py +++ b/pykokkos/core/module_setup.py @@ -5,7 +5,7 @@ import sys import sysconfig import time -from typing import Callable, Optional, Union +from typing import Callable, List, Optional, Union from pykokkos.interface import ExecutionSpace import pykokkos.kokkos_manager as km @@ -105,9 +105,15 @@ def __init__( self.main: Path = self.get_main_path() self.output_dir: Optional[Path] = self.get_output_dir(self.main, self.metadata, space) + self.gpu_module_files: List[str] = [] + if km.is_multi_gpu_enabled(): + self.gpu_module_files = [f"kernel{device_id}{suffix}" for device_id in range(km.get_num_gpus())] if self.output_dir is not None: self.path: str = os.path.join(self.output_dir, self.module_file) + if km.is_multi_gpu_enabled(): + self.gpu_module_paths: str = [os.path.join(self.output_dir, module_file) for module_file in self.gpu_module_files] + self.name: str = self.path.replace("/", "_") self.name: str = self.name.replace("-", "_") self.name: str = self.name.replace(".", "_") diff --git a/pykokkos/core/runtime.py b/pykokkos/core/runtime.py index d9f02a13..a8dbc57e 100644 --- a/pykokkos/core/runtime.py +++ b/pykokkos/core/runtime.py @@ -9,7 +9,8 @@ from pykokkos.core.visitors import visitors_util from pykokkos.interface import ( DataType, ExecutionPolicy, ExecutionSpace, MemorySpace, - RandomPool, RangePolicy, TeamPolicy, View, ViewType + RandomPool, RangePolicy, TeamPolicy, View, ViewType, + is_host_execution_space ) import pykokkos.kokkos_manager as km @@ -47,7 +48,7 @@ def run_workload(self, space: ExecutionSpace, workload: object) -> None: if members is None: raise RuntimeError("ERROR: members cannot be none") - self.execute(workload, module_setup, members) + self.execute(workload, module_setup, members, space) self.run_callbacks(workload, members) @@ -82,7 +83,7 @@ def run_workunit( if members is None: raise RuntimeError("ERROR: members cannot be none") - return self.execute(workunit, module_setup, members, policy=policy, name=name, **kwargs) + return self.execute(workunit, module_setup, members, policy.space, policy=policy, name=name, **kwargs) def is_debug(self, space: ExecutionSpace) -> bool: """ @@ -100,6 +101,7 @@ def execute( entity: Union[object, Callable[..., None]], module_setup: ModuleSetup, members: PyKokkosMembers, + space: ExecutionSpace, policy: Optional[ExecutionPolicy] = None, name: Optional[str] = None, **kwargs @@ -110,13 +112,21 @@ def execute( :param entity: the workload or workunit object :param module_path: the path to the compiled module :param members: a collection of PyKokkos related members + :param space: the execution space :param policy: the execution policy for workunits :param name: the name of the kernel :param kwargs: the keyword arguments passed to the workunit :returns: the result of the operation (None for "for" and workloads) """ - module = self.import_module(module_setup.name, module_setup.path) + module_path: str + if is_host_execution_space(space) or not km.is_multi_gpu_enabled(): + module_path = module_setup.path + else: + device_id: int = km.get_device_id() + module_path = module_setup.gpu_module_paths[device_id] + + module = self.import_module(module_setup.name, module_path) args: Dict[str, Any] = self.get_arguments(entity, members, policy, **kwargs) if name is None: @@ -141,12 +151,14 @@ def import_module(self, module_name: str, module_path: str): :returns: the imported module """ - if module_name in sys.modules: - return sys.modules[module_name] + hashed_name: str = module_name.replace("kernel", f"kernel_{km.get_device_id()}") + + if hashed_name in sys.modules: + return sys.modules[hashed_name] spec = importlib.util.spec_from_file_location(module_name, module_path) module = importlib.util.module_from_spec(spec) - sys.modules[module_name] = module + sys.modules[hashed_name] = module spec.loader.exec_module(module) return module diff --git a/pykokkos/core/translators/bindings.py b/pykokkos/core/translators/bindings.py index 74e889d0..0def6a82 100644 --- a/pykokkos/core/translators/bindings.py +++ b/pykokkos/core/translators/bindings.py @@ -269,7 +269,7 @@ def generate_call(operation: str, functor: str, members: PyKokkosMembers, tag: c if is_hierarchical: args.append(f"Kokkos::TeamPolicy<{Keywords.DefaultExecSpace.value},{functor}::{tag_name}>({Keywords.LeagueSize.value},Kokkos::AUTO,{Keywords.VectorLength.value})") else: - args.append(f"Kokkos::RangePolicy<{Keywords.DefaultExecSpace.value},{functor}::{tag_name}>({Keywords.ThreadsBegin.value},{Keywords.ThreadsEnd.value})") + args.append(f"Kokkos::RangePolicy<{Keywords.DefaultExecSpace.value},{functor}::{tag_name}>({Keywords.DefaultExecSpaceInstance.value}, {Keywords.ThreadsBegin.value},{Keywords.ThreadsEnd.value})") args.append(Keywords.Instance.value) diff --git a/pykokkos/interface/__init__.py b/pykokkos/interface/__init__.py index 8c975b5f..48bdfdc2 100644 --- a/pykokkos/interface/__init__.py +++ b/pykokkos/interface/__init__.py @@ -27,7 +27,7 @@ ExecutionPolicy, RangePolicy, MDRangePolicy, TeamPolicy, TeamThreadRange, ThreadVectorRange, Iterate, Rank ) -from .execution_space import ExecutionSpace +from .execution_space import ExecutionSpace, is_host_execution_space from .layout import Layout, get_default_layout from .hierarchical import ( AUTO, TeamMember, PerTeam, PerThread, single diff --git a/pykokkos/interface/execution_space.py b/pykokkos/interface/execution_space.py index 0d31eae1..51aae703 100644 --- a/pykokkos/interface/execution_space.py +++ b/pykokkos/interface/execution_space.py @@ -1,5 +1,6 @@ from enum import Enum +import pykokkos.kokkos_manager as km class ExecutionSpace(Enum): Cuda = "Cuda" @@ -8,3 +9,16 @@ class ExecutionSpace(Enum): Serial = "Serial" Debug = "Debug" Default = "Default" + +def is_host_execution_space(space: ExecutionSpace) -> bool: + """ + Check if the supplied execution space runs on the host + + :param space: the space being checked + :returns: True if the space runs on the host + """ + + if space is ExecutionSpace.Default: + space = km.get_default_space() + + return space in {ExecutionSpace.OpenMP, ExecutionSpace.Pthreads, ExecutionSpace.Serial} \ No newline at end of file diff --git a/pykokkos/interface/views.py b/pykokkos/interface/views.py index 01e9ebd0..9b50d9c4 100644 --- a/pykokkos/interface/views.py +++ b/pykokkos/interface/views.py @@ -1,9 +1,9 @@ from __future__ import annotations import ctypes -import os import math from enum import Enum import sys +from types import ModuleType from typing import ( Dict, Generic, Iterator, List, Optional, Tuple, TypeVar, Union @@ -225,7 +225,10 @@ def resize(self, dimension: int, size: int) -> None: shape_list[dimension] = size self.shape = tuple(shape_list) - self.array = kokkos.array( + + is_cpu: bool = self.space is MemorySpace.HostSpace + kokkos_lib: ModuleType = km.get_kokkos_module(is_cpu) + self.array = kokkos_lib.array( "", self.shape, None, None, self.dtype.value, self.space.value, self.layout.value, self.trait.value) self.data = np.array(self.array, copy=False) @@ -284,6 +287,9 @@ def _init_view( self.layout: Layout = layout self.trait: Trait = trait + is_cpu: bool = self.space is MemorySpace.HostSpace + kokkos_lib: ModuleType = km.get_kokkos_module(is_cpu) + if self.dtype == pk.float: self.dtype = DataType.float elif self.dtype == pk.double: @@ -294,11 +300,11 @@ def _init_view( # NumPy for now... self.array = array else: - self.array = kokkos.unmanaged_array(array, dtype=self.dtype.value, space=self.space.value, layout=self.layout.value) + self.array = kokkos_lib.unmanaged_array(array, dtype=self.dtype.value, space=self.space.value, layout=self.layout.value) else: if len(self.shape) == 0: shape = [1] - self.array = kokkos.array("", shape, None, None, self.dtype.value, space.value, layout.value, trait.value) + self.array = kokkos_lib.array("", shape, None, None, self.dtype.value, space.value, layout.value, trait.value) self.data = np.array(self.array, copy=False) def _get_type(self, dtype: Union[DataType, type]) -> Optional[DataType]: @@ -382,10 +388,15 @@ def __init__(self, parent_view: Union[Subview, View], data_slice: Union[slice, T self.data: np.ndarray = parent_view.data[data_slice] self.dtype = parent_view.dtype - self.array = kokkos.array( + + is_cpu: bool = self.parent_view.space is MemorySpace.HostSpace + kokkos_lib: ModuleType = km.get_kokkos_module(is_cpu) + + self.array = kokkos_lib.array( self.data, dtype=parent_view.dtype.value, space=parent_view.space.value, layout=parent_view.layout.value, trait=kokkos.Unmanaged) self.shape: Tuple[int] = self.data.shape + if self.data.shape == (0,): self.data = np.array([], dtype=self.data.dtype) self.shape = () diff --git a/pykokkos/kokkos_manager/__init__.py b/pykokkos/kokkos_manager/__init__.py index 3d43647b..d68b1b44 100644 --- a/pykokkos/kokkos_manager/__init__.py +++ b/pykokkos/kokkos_manager/__init__.py @@ -1,5 +1,6 @@ import os -from typing import Any, Dict +from types import ModuleType +from typing import Any, Dict, List from pykokkos.bindings import kokkos from pykokkos.interface.execution_space import ExecutionSpace @@ -9,7 +10,12 @@ "EXECUTION_SPACE": ExecutionSpace.OpenMP, "REAL_DTYPE": double, "IS_INITIALIZED": False, - "ENABLE_UVM": False + "ENABLE_UVM": False, + "MULTI_GPU": False, + "NUM_GPUS": 0, + "KOKKOS_GPU_MODULE": kokkos, + "KOKKOS_GPU_MODULE_LIST": [], + "DEVICE_ID": 0 } def get_default_space() -> ExecutionSpace: @@ -99,3 +105,109 @@ def finalize() -> None: if CONSTANTS["IS_INITIALIZED"] == True: kokkos.finalize() CONSTANTS["IS_INITIALIZED"] = False + +def get_kokkos_module(is_cpu: bool) -> ModuleType: + """ + Get the current kokkos module + + :param is_cpu: is the lib needed for cpu + :returns: the kokkos module + """ + + if is_cpu: + return kokkos + + return CONSTANTS["KOKKOS_GPU_MODULE"] + +def set_device_id(device_id: int) -> None: + """ + Set the current device ID + + :param device_id: the ID of the device to enable + """ + + if not isinstance(device_id, int): + raise TypeError("'device_id' must be of type 'int'") + + num_gpus: int = CONSTANTS["NUM_GPUS"] + if device_id >= num_gpus or device_id < 0: + raise RuntimeError(f"Device {device_id} does not exist (range [0..{num_gpus})") + + if num_gpus == 1: + return + + import cupy + cupy.cuda.runtime.setDevice(device_id) + CONSTANTS["DEVICE_ID"] = device_id + + gpu_lib = CONSTANTS["KOKKOS_GPU_MODULE_LIST"][device_id] + CONSTANTS["KOKKOS_GPU_MODULE"] = gpu_lib + +def get_device_id() -> int: + """ + Get the ID of the currently enabled device + + :returns: the ID of the enabled device + """ + + return CONSTANTS["DEVICE_ID"] + +def is_multi_gpu_enabled() -> bool: + """ + Check if pykokkos has been configured for multi-gpu use + + :returns: True or False + """ + + return CONSTANTS["MULTI_GPU"] + +def get_kokkos_gpu_modules() -> List: + """ + Get the pykokkos-base gpu modules + + :returns: the list of modules + """ + + return CONSTANTS["KOKKOS_GPU_MODULE_LIST"] + +def get_num_gpus() -> bool: + """ + Get the number of gpus pykokkos has been configured for + + :returns: the number of gpus + """ + + return CONSTANTS["NUM_GPUS"] + +try: + # Import multiple kokkos libs to support multiple devices per + # process. This assumes that there are modules named f"gpu{id}" + # that can be imported. + import atexit + import cupy as cp + import importlib + import sys + + NUM_CUDA_GPUS: int = cp.cuda.runtime.getDeviceCount() + KOKKOS_LIBS: List[str] = [f"gpu{id}" for id in range(NUM_CUDA_GPUS)] + + KOKKOS_LIB_INSTANCES: List = [] + for id, lib in enumerate(KOKKOS_LIBS): + module = importlib.import_module(lib) + KOKKOS_LIB_INSTANCES.append(module) + + # Can't pass device id directly to initialize(), so need to + # append argument to select device to sys.argv. + # (see https://github.com/kokkos/pykokkos-base/blob/d3946ed56483f3cbe2e660cc50fe73c50dad19ea/src/libpykokkos.cpp#L65) + sys.argv.append(f"--device-id={id}") + module.initialize() + atexit.register(module.finalize) + sys.argv.pop() + + CONSTANTS["MULTI_GPU"] = True + CONSTANTS["NUM_GPUS"] = NUM_CUDA_GPUS + CONSTANTS["KOKKOS_GPU_MODULE_LIST"] = KOKKOS_LIB_INSTANCES + CONSTANTS["KOKKOS_GPU_MODULE"] = KOKKOS_LIB_INSTANCES[0] + +except Exception: + pass