From f31de4a702d472a2bd1153e9ea10dbe1b26a7b21 Mon Sep 17 00:00:00 2001 From: skjerns <14980558+skjerns@users.noreply.github.com> Date: Wed, 15 May 2019 14:27:20 +0200 Subject: [PATCH 1/6] Changed prediction to run with multiprocessing --- sklearn_porter/Porter.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/sklearn_porter/Porter.py b/sklearn_porter/Porter.py index 04792147..460ecec0 100644 --- a/sklearn_porter/Porter.py +++ b/sklearn_porter/Porter.py @@ -6,6 +6,8 @@ import numpy as np +from joblib import Parallel, delayed + from sklearn.metrics import accuracy_score from sklearn.tree.tree import DecisionTreeClassifier from sklearn.ensemble.weight_boosting import AdaBoostClassifier @@ -375,11 +377,12 @@ def predict(self, X, class_name=None, method_name=None, tnp_dir='tmp', # Multiple feature sets: if exec_cmd is not None and len(X.shape) > 1: - pred_y = np.empty(X.shape[0], dtype=int) - for idx, features in enumerate(X): - full_exec_cmd = exec_cmd + [str(f).strip() for f in features] - pred = Shell.check_output(full_exec_cmd, cwd=tnp_dir) - pred_y[idx] = int(pred) + cmds = [exec_cmd + [str(f).strip() for f in feat] for feat in X] + max_threads = 8 + # using threading will increase speed 8-fold + preds = Parallel(n_jobs=max_threads, backend='threading') \ + (delayed(Shell.check_output)(cmd, cwd=tnp_dir) for cmd in cmds) + pred_y = np.array([int(pred) for pred in preds], dtype=int) # Cleanup: if not keep_tmp_dir: From 13ab9b1ff4f671324621914702804626e7cd97b0 Mon Sep 17 00:00:00 2001 From: skjerns <14980558+skjerns@users.noreply.github.com> Date: Wed, 15 May 2019 14:44:05 +0200 Subject: [PATCH 2/6] Update Porter.py --- sklearn_porter/Porter.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sklearn_porter/Porter.py b/sklearn_porter/Porter.py index 460ecec0..845cd06e 100644 --- a/sklearn_porter/Porter.py +++ b/sklearn_porter/Porter.py @@ -3,6 +3,7 @@ import os import sys import types +import subprocess import numpy as np @@ -377,6 +378,8 @@ def predict(self, X, class_name=None, method_name=None, tnp_dir='tmp', # Multiple feature sets: if exec_cmd is not None and len(X.shape) > 1: + tnp_dir = './' + tnp_dir + exec_cmd = [os.path.join(os.path.abspath(tnp_dir), exec_cmd[0])] cmds = [exec_cmd + [str(f).strip() for f in feat] for feat in X] max_threads = 8 # using threading will increase speed 8-fold From b1423602828f01dc7270e757fed1595888e64d87 Mon Sep 17 00:00:00 2001 From: skjerns <14980558+skjerns@users.noreply.github.com> Date: Wed, 15 May 2019 15:13:56 +0200 Subject: [PATCH 3/6] Update Porter.py --- sklearn_porter/Porter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn_porter/Porter.py b/sklearn_porter/Porter.py index 845cd06e..2d78f3c3 100644 --- a/sklearn_porter/Porter.py +++ b/sklearn_porter/Porter.py @@ -384,7 +384,7 @@ def predict(self, X, class_name=None, method_name=None, tnp_dir='tmp', max_threads = 8 # using threading will increase speed 8-fold preds = Parallel(n_jobs=max_threads, backend='threading') \ - (delayed(Shell.check_output)(cmd, cwd=tnp_dir) for cmd in cmds) + (delayed(subprocess.check_output)(cmd, cwd=tnp_dir) for cmd in cmds) pred_y = np.array([int(pred) for pred in preds], dtype=int) # Cleanup: From b96071fbe4dff672eebcb233d5ccacad7b040c03 Mon Sep 17 00:00:00 2001 From: skjerns <14980558+skjerns@users.noreply.github.com> Date: Wed, 15 May 2019 15:15:32 +0200 Subject: [PATCH 4/6] Update Porter.py --- sklearn_porter/Porter.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn_porter/Porter.py b/sklearn_porter/Porter.py index 2d78f3c3..0305837b 100644 --- a/sklearn_porter/Porter.py +++ b/sklearn_porter/Porter.py @@ -381,8 +381,8 @@ def predict(self, X, class_name=None, method_name=None, tnp_dir='tmp', tnp_dir = './' + tnp_dir exec_cmd = [os.path.join(os.path.abspath(tnp_dir), exec_cmd[0])] cmds = [exec_cmd + [str(f).strip() for f in feat] for feat in X] - max_threads = 8 - # using threading will increase speed 8-fold + max_threads = joblib.cpu_count() + # using threading will increase speed n-fold, depending on CPUs preds = Parallel(n_jobs=max_threads, backend='threading') \ (delayed(subprocess.check_output)(cmd, cwd=tnp_dir) for cmd in cmds) pred_y = np.array([int(pred) for pred in preds], dtype=int) From fed15592b98ab2719df3536f3daac123d67fa03f Mon Sep 17 00:00:00 2001 From: skjerns <14980558+skjerns@users.noreply.github.com> Date: Thu, 16 May 2019 09:39:06 +0200 Subject: [PATCH 5/6] Update requirements.txt --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index 94bcceb9..810e0248 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,3 @@ six scikit-learn>=0.14.1 +joblib From 19bb4984eaa3bbcabb08e99b853ae522b3b78e3b Mon Sep 17 00:00:00 2001 From: skjerns <14980558+skjerns@users.noreply.github.com> Date: Tue, 25 Jun 2019 14:30:41 +0200 Subject: [PATCH 6/6] update dependency/import --- sklearn_porter/Porter.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn_porter/Porter.py b/sklearn_porter/Porter.py index 0305837b..c3cd004d 100644 --- a/sklearn_porter/Porter.py +++ b/sklearn_porter/Porter.py @@ -7,7 +7,7 @@ import numpy as np -from joblib import Parallel, delayed +from joblib import Parallel, delayed, cpu_count from sklearn.metrics import accuracy_score from sklearn.tree.tree import DecisionTreeClassifier @@ -381,7 +381,7 @@ def predict(self, X, class_name=None, method_name=None, tnp_dir='tmp', tnp_dir = './' + tnp_dir exec_cmd = [os.path.join(os.path.abspath(tnp_dir), exec_cmd[0])] cmds = [exec_cmd + [str(f).strip() for f in feat] for feat in X] - max_threads = joblib.cpu_count() + max_threads = cpu_count() # using threading will increase speed n-fold, depending on CPUs preds = Parallel(n_jobs=max_threads, backend='threading') \ (delayed(subprocess.check_output)(cmd, cwd=tnp_dir) for cmd in cmds)