From c2dcbc1bee4d026c46c5656645966afbeb852625 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gordon=20J=2E=20K=C3=B6hn?= Date: Tue, 15 Oct 2024 10:16:04 +0200 Subject: [PATCH] Prevent Thread Oversubscription with threadpoolclt In a cluster setting, thread oversubscription can lead to significant performance degradation and resource contention for running the deconvolution with scipy.optimize. This commit addresses this issue by utilizing the `threadpoolclt` library to limit the number of threads to 1. This change ensures that each process uses only the allocated resources, preventing contention and improving overall cluster stability. --- lollipop/cli/deconvolute.py | 14 ++++++++++++-- pyproject.toml | 1 + 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/lollipop/cli/deconvolute.py b/lollipop/cli/deconvolute.py index b827b88..2362da9 100755 --- a/lollipop/cli/deconvolute.py +++ b/lollipop/cli/deconvolute.py @@ -14,6 +14,7 @@ import pandas as pd import numpy as np from tqdm import tqdm, trange +from threadpoolctl import ThreadpoolController import lollipop as ll @@ -147,6 +148,10 @@ def _deconvolute_bootstrap( The deconvolution results for the location and bootstrap iterations. """ + # monitor the number of threads, to prevent oversubscription on blas / cluster systmes + controller = ThreadpoolController() + logging.info(f"Threading configuration:\n {controller.info()}") + # deconvolution results deconv = [] @@ -232,7 +237,7 @@ def _deconvolute_bootstrap( # just run one on everything weights = {} - # deconvolution + # define deconvolution kernel t_kdec = ll.KernelDeconv( temp_df2[var_dates["var_dates"][mindate] + ["undetermined"]], temp_df2["frac"], @@ -242,7 +247,11 @@ def _deconvolute_bootstrap( confint=confint(**confint_params), **weights, ) - t_kdec = t_kdec.deconv_all(**deconv_params) + # limit the number of threads, to prevent oversubscription on blas / cluster systmes + with controller.limit(limits=1, user_api='blas'): + # do the deconvolution + t_kdec = t_kdec.deconv_all(**deconv_params) + if have_confint: # with conf int res = t_kdec.fitted.copy() @@ -389,6 +398,7 @@ def deconvolute( tally_data, namefield, ): + # load data yaml = ruamel.yaml.YAML(typ="rt") print("load data") diff --git a/pyproject.toml b/pyproject.toml index 20a3226..a7b4634 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,6 +23,7 @@ strictyaml = { version = ">=1.7", optional = true } tqdm = { version = ">=4.64", optional = true } click = { version = "^8.0", optional = true } click-option-group = { version = "^0.5", optional = true } +threadpoolctl = "^3.5.0" [tool.poetry.extras] cli = [ "zstandard", "ruamel.yaml", "strictyaml", "tqdm", "click", "click-option-group" ]