Skip to content

Commit

Permalink
only adding pids
Browse files Browse the repository at this point in the history
  • Loading branch information
IlyasMoutawwakil committed Nov 14, 2023
1 parent 4767fd8 commit e04d256
Showing 1 changed file with 5 additions and 4 deletions.
9 changes: 5 additions & 4 deletions optimum_benchmark/backends/isolation_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,10 @@ def only_this_process_is_running_on_cuda_devices(cuda_devices: List[int], benchm
nvml.nvmlInit()
for device_id in cuda_devices:
device_handle = nvml.nvmlDeviceGetHandleByIndex(device_id)
pids[device_id] = set(nvml.nvmlDeviceGetComputeRunningProcesses(device_handle))
device_processes = nvml.nvmlDeviceGetComputeRunningProcesses(device_handle)
for device_process in device_processes:
pids[device_id].add(device_process.pid)

nvml.nvmlShutdown()
elif is_rocm_system():
rocm_version = torch_version().split("rocm")[-1]
Expand Down Expand Up @@ -73,10 +76,8 @@ def only_this_process_is_running_on_cuda_devices(cuda_devices: List[int], benchm

if len(other_pids) > 0:
error_message = f"Expected only process {benchmark_pid} on device(s) {cuda_devices}, but found {other_pids}."

# for pid in other_pids:
# error_message += f"\nProcess {pid} info: {get_pid_info(pid)}"

raise RuntimeError(error_message)


Expand All @@ -88,7 +89,7 @@ def only_this_process_will_run_on_cuda_devices(cuda_devices: List[int], benchmar
try:
only_this_process_is_running_on_cuda_devices(cuda_devices, benchmark_pid)
time.sleep(0.1)
except RuntimeError as exception:
except Exception as exception:
os.kill(benchmark_pid, signal.SIGTERM)
raise exception

Expand Down

0 comments on commit e04d256

Please sign in to comment.