forked from IntelPython/scikit-learn_bench
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Makefile
267 lines (228 loc) · 9.92 KB
/
Makefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
# Sizes
DISTANCES_SIZE = 1000x15000
REGRESSION_SIZE = 1000000x50
KMEANS_SAMPLES = 1000000
KMEANS_FEATURES = 50
KMEANS_SIZE = $(KMEANS_SAMPLES)x$(KMEANS_FEATURES)
SVM_SAMPLES = 100000
SVM_FEATURES = 100
SVM_SIZE = $(SVM_SAMPLES)x$(SVM_FEATURES)
LOGREG_SAMPLES = 100000
LOGREG_FEATURES = 100
LOGREG_SIZE = $(LOGREG_SAMPLES)x$(LOGREG_FEATURES)
DFCLF_SAMPLES = 10000
DFCLF_FEATURES = 100
DFCLF_SIZE = $(DFCLF_SAMPLES)x$(DFCLF_FEATURES)
DFREG_SAMPLES = 10000
DFREG_FEATURES = 100
DFREG_SIZE = $(DFREG_SAMPLES)x$(DFREG_FEATURES)
ITERATIONS = 10
# Bookkeeping options
BATCH = $(shell date -Iseconds)
HOST = $(shell hostname)
# This makes the makefile exit on failed benchmarks. We pipe the
# benchmark outputs to "tee", which results in unexpected successes.
SHELL = bash -o pipefail
# Other options
NUM_THREADS = -1
SVM_NUM_THREADS = 0
LOGREG_NUM_THREADS = $(SVM_NUM_THREADS)
DFCLF_NUM_THREADS = $(SVM_NUM_THREADS)
DFREG_NUM_THREADS = $(SVM_NUM_THREADS)
MULTIPLIER = 100
DATA_DIR = data/
DATA_kmeans = data/clustering/kmeans_$(KMEANS_SIZE).npy
COMMON_ARGS = --batch '$(BATCH)' --arch '$(HOST)' \
--num-threads '$(NUM_THREADS)' --header
# Define which benchmarks to run
NATIVE_BENCHMARKS = distances ridge linear kmeans svm2 svm5 \
logreg2 logreg5 dfclf2 dfclf5 dfreg pca_daal pca_full
SKLEARN_BENCHMARKS = distances ridge linear kmeans svm2 svm5 \
logreg2 logreg5 dfclf2 dfclf5 dfreg pca_full
DAAL4PY_BENCHMARKS = distances ridge linear kmeans svm2 svm5 \
logreg2 logreg5 dfclf2 dfclf5 dfreg pca_daal pca_full
# Define native benchmark binary names
NATIVE_distances = distances
NATIVE_ridge = ridge
NATIVE_linear = linear
NATIVE_kmeans = kmeans
NATIVE_svm2 = svm
NATIVE_svm5 = svm
NATIVE_logreg2 = log_reg_lbfgs
NATIVE_logreg5 = log_reg_lbfgs
NATIVE_dfclf2 = decision_forest_clsf
NATIVE_dfclf5 = decision_forest_clsf
NATIVE_dfreg = decision_forest_regr
NATIVE_pca_daal = pca
NATIVE_pca_full = pca
# Define arguments for native benchmarks
ARGS_NATIVE_distances = --num-threads "$(NUM_THREADS)" \
--size "$(DISTANCES_SIZE)" --header
ARGS_NATIVE_ridge = --num-threads "$(NUM_THREADS)" \
--size "$(REGRESSION_SIZE)" --header
ARGS_NATIVE_linear = --num-threads "$(NUM_THREADS)" \
--size "$(REGRESSION_SIZE)" --header
ARGS_NATIVE_pca_daal = --num-threads "$(NUM_THREADS)" --header \
--size "$(REGRESSION_SIZE)" --svd-solver daal
ARGS_NATIVE_pca_full = --num-threads "$(NUM_THREADS)" --header \
--size "$(REGRESSION_SIZE)" --svd-solver full
ARGS_NATIVE_kmeans = --num-threads "$(NUM_THREADS)" --header \
--data-multiplier "$(MULTIPLIER)" \
--filex data/clustering/kmeans_$(KMEANS_SIZE).npy \
--filei data/clustering/kmeans_$(KMEANS_SIZE).init.npy
ARGS_NATIVE_svm2 = --fileX data/two/X-$(SVM_SIZE).npy \
--fileY data/two/y-$(SVM_SIZE).npy \
--num-threads $(SVM_NUM_THREADS) --header
ARGS_NATIVE_svm5 = --fileX data/multi/X-$(SVM_SIZE).npy \
--fileY data/multi/y-$(SVM_SIZE).npy \
--num-threads $(SVM_NUM_THREADS) --header
ARGS_NATIVE_logreg2 = --fileX data/two/X-$(LOGREG_SIZE).npy \
--fileY data/two/y-$(LOGREG_SIZE).npy \
--num-threads $(LOGREG_NUM_THREADS) --header
ARGS_NATIVE_logreg5 = --fileX data/multi/X-$(LOGREG_SIZE).npy \
--fileY data/multi/y-$(LOGREG_SIZE).npy \
--num-threads $(LOGREG_NUM_THREADS) --header
ARGS_NATIVE_dfclf2 = --fileX data/two/X-$(DFCLF_SIZE).npy \
--fileY data/two/y-$(DFCLF_SIZE).npy \
--num-threads $(DFCLF_NUM_THREADS) --header
ARGS_NATIVE_dfclf5 = --fileX data/multi/X-$(DFCLF_SIZE).npy \
--fileY data/multi/y-$(DFCLF_SIZE).npy \
--num-threads $(DFCLF_NUM_THREADS) --header
ARGS_NATIVE_dfreg = --fileX data/reg/X-$(DFREG_SIZE).npy \
--fileY data/reg/y-$(DFREG_SIZE).npy \
--num-threads $(DFREG_NUM_THREADS) --header
SKLEARN_distances = distances
SKLEARN_ridge = ridge
SKLEARN_linear = linear
SKLEARN_pca_full = pca
SKLEARN_pca_daal = pca
SKLEARN_kmeans = kmeans
SKLEARN_svm2 = svm
SKLEARN_svm5 = svm
SKLEARN_logreg2 = log_reg
SKLEARN_logreg5 = log_reg
SKLEARN_dfclf2 = df_clsf
SKLEARN_dfclf5 = df_clsf
SKLEARN_dfreg = df_regr
ARGS_SKLEARN_distances = --size "$(DISTANCES_SIZE)"
ARGS_SKLEARN_ridge = --size "$(REGRESSION_SIZE)"
ARGS_SKLEARN_linear = --size "$(REGRESSION_SIZE)"
ARGS_SKLEARN_pca_daal = --size "$(REGRESSION_SIZE)" --svd-solver daal
ARGS_SKLEARN_pca_full = --size "$(REGRESSION_SIZE)" --svd-solver full
ARGS_SKLEARN_kmeans = --file-X-train data/clustering/kmeans_$(KMEANS_SIZE).npy \
--filei data/clustering/kmeans_$(KMEANS_SIZE).init.npy
ARGS_SKLEARN_svm2 = --file-X-train data/two/X-$(SVM_SIZE).npy \
--file-y-train data/two/y-$(SVM_SIZE).npy
ARGS_SKLEARN_svm5 = --file-X-train data/multi/X-$(SVM_SIZE).npy \
--file-y-train data/multi/y-$(SVM_SIZE).npy
ARGS_SKLEARN_logreg2 = --file-X-train data/two/X-$(LOGREG_SIZE).npy \
--file-y-train data/two/y-$(LOGREG_SIZE).npy
ARGS_SKLEARN_logreg5 = --file-X-train data/multi/X-$(LOGREG_SIZE).npy \
--file-y-train data/multi/y-$(LOGREG_SIZE).npy
ARGS_SKLEARN_dfclf2 = --file-X-train data/two/X-$(DFCLF_SIZE).npy \
--file-y-train data/two/y-$(DFCLF_SIZE).npy
ARGS_SKLEARN_dfclf5 = --file-X-train data/multi/X-$(DFCLF_SIZE).npy \
--file-y-train data/multi/y-$(DFCLF_SIZE).npy
ARGS_SKLEARN_dfreg = --file-X-train data/reg/X-$(DFREG_SIZE).npy \
--file-y-train data/reg/y-$(DFREG_SIZE).npy
DAAL4PY_distances = distances
DAAL4PY_ridge = ridge
DAAL4PY_linear = linear
DAAL4PY_pca_full = pca
DAAL4PY_pca_daal = pca
DAAL4PY_kmeans = kmeans
DAAL4PY_svm2 = svm
DAAL4PY_svm5 = svm
DAAL4PY_logreg2 = log_reg
DAAL4PY_logreg5 = log_reg
DAAL4PY_dfclf2 = df_clsf
DAAL4PY_dfclf5 = df_clsf
DAAL4PY_dfreg = df_regr
ARGS_DAAL4PY_distances = --size "$(DISTANCES_SIZE)"
ARGS_DAAL4PY_ridge = --size "$(REGRESSION_SIZE)"
ARGS_DAAL4PY_linear = --size "$(REGRESSION_SIZE)"
ARGS_DAAL4PY_pca_daal = --size "$(REGRESSION_SIZE)" --svd-solver daal
ARGS_DAAL4PY_pca_full = --size "$(REGRESSION_SIZE)" --svd-solver full
ARGS_DAAL4PY_kmeans = --file-X-train data/clustering/kmeans_$(KMEANS_SIZE).npy \
--filei data/clustering/kmeans_$(KMEANS_SIZE).init.npy
ARGS_DAAL4PY_svm2 = --file-X-train data/two/X-$(SVM_SIZE).npy \
--file-y-train data/two/y-$(SVM_SIZE).npy
ARGS_DAAL4PY_svm5 = --file-X-train data/multi/X-$(SVM_SIZE).npy \
--file-y-train data/multi/y-$(SVM_SIZE).npy
ARGS_DAAL4PY_logreg2 = --file-X-train data/two/X-$(LOGREG_SIZE).npy \
--file-y-train data/two/y-$(LOGREG_SIZE).npy
ARGS_DAAL4PY_logreg5 = --file-X-train data/multi/X-$(LOGREG_SIZE).npy \
--file-y-train data/multi/y-$(LOGREG_SIZE).npy
ARGS_DAAL4PY_dfclf2 = --file-X-train data/two/X-$(DFCLF_SIZE).npy \
--file-y-train data/two/y-$(DFCLF_SIZE).npy
ARGS_DAAL4PY_dfclf5 = --file-X-train data/multi/X-$(DFCLF_SIZE).npy \
--file-y-train data/multi/y-$(DFCLF_SIZE).npy
ARGS_DAAL4PY_dfreg = --file-X-train data/reg/X-$(DFREG_SIZE).npy \
--file-y-train data/reg/y-$(DFREG_SIZE).npy
comma = ,
ifneq ($(CONDA_PREFIX),)
LD_LIBRARY_PATH := $(LD_LIBRARY_PATH):$(CONDA_PREFIX)/lib
export LD_LIBRARY_PATH
endif
export I_MPI_ROOT
all: native python
python: sklearn daal4py
native/bin/%: native/%.cpp
git submodule init && git submodule update
$(MAKE) -C native
output/native/%.out: | DATA_% output/native/
[ -f native/bin/$(NATIVE_$*) ] || make -C native
native/bin/$(NATIVE_$*) $(ARGS_NATIVE_$*) | tee $@
output/sklearn/%.out: | DATA_% output/sklearn/
python sklearn/$(SKLEARN_$*).py $(COMMON_ARGS) $(ARGS_SKLEARN_$*) | tee $@
output/daal4py/%.out: | DATA_% output/daal4py/
python daal4py/$(DAAL4PY_$*).py $(COMMON_ARGS) $(ARGS_DAAL4PY_$*) | tee $@
output/%/:
mkdir -p $@
native: $(addsuffix .out,$(addprefix output/native/,$(NATIVE_BENCHMARKS))) data
sklearn: $(addsuffix .out,$(addprefix output/sklearn/,$(SKLEARN_BENCHMARKS))) data
daal4py: $(addsuffix .out,$(addprefix output/daal4py/,$(DAAL4PY_BENCHMARKS))) data
data: $(KMEANS_DATA) svm_data logreg_data df_clf_data
DATA_kmeans: data/clustering/kmeans_$(KMEANS_SIZE).npy
DATA_svm2: data/two/X-$(SVM_SIZE).npy
DATA_svm5: data/multi/X-$(SVM_SIZE).npy
DATA_logreg2: data/two/X-$(LOGREG_SIZE).npy
DATA_logreg5: data/multi/X-$(LOGREG_SIZE).npy
DATA_dfclf2: data/two/X-$(DFCLF_SIZE).npy
DATA_dfclf5: data/multi/X-$(DFCLF_SIZE).npy
DATA_dfreg: data/reg/X-$(DFREG_SIZE).npy
DATA_%: ;
data/clustering/kmeans_$(KMEANS_SAMPLES)x$(KMEANS_FEATURES).npy: | data/clustering/
python make_datasets.py -f $(KMEANS_FEATURES) -s $(KMEANS_SAMPLES) \
kmeans -c 10 -x $(basename $@) -i $(basename $@).init \
-t $(basename $@).tol
data/two/X-$(SVM_SAMPLES)x$(SVM_FEATURES).npy: | data/two/
python make_datasets.py -f $(SVM_FEATURES) -s $(SVM_SAMPLES) \
classification -c 2 -x $@ -y $(dir $@)/$(subst X-,y-,$(notdir $@))
data/multi/X-$(SVM_SAMPLES)x$(SVM_FEATURES).npy: | data/multi/
python make_datasets.py -f $(SVM_FEATURES) -s $(SVM_SAMPLES) \
classification -c 5 -x $@ -y $(dir $@)/$(subst X-,y-,$(notdir $@))
data/two/X-$(LOGREG_SAMPLES)x$(LOGREG_FEATURES).npy: | data/two/
python make_datasets.py -f $(LOGREG_FEATURES) -s $(LOGREG_SAMPLES) \
classification -c 2 -x $@ -y $(dir $@)/$(subst X-,y-,$(notdir $@))
data/multi/X-$(LOGREG_SAMPLES)x$(LOGREG_FEATURES).npy: | data/multi/
python make_datasets.py -f $(LOGREG_FEATURES) -s $(LOGREG_SAMPLES) \
classification -c 5 -x $@ -y $(dir $@)/$(subst X-,y-,$(notdir $@))
data/two/X-$(DFCLF_SAMPLES)x$(DFCLF_FEATURES).npy: | data/two/
python make_datasets.py -f $(DFCLF_FEATURES) -s $(DFCLF_SAMPLES) \
classification -c 2 -x $@ -y $(dir $@)/$(subst X-,y-,$(notdir $@))
data/multi/X-$(DFCLF_SAMPLES)x$(DFCLF_FEATURES).npy: | data/multi/
python make_datasets.py -f $(DFCLF_FEATURES) -s $(DFCLF_SAMPLES) \
classification -c 5 -x $@ -y $(dir $@)/$(subst X-,y-,$(notdir $@))
data/reg/X-$(DFCLF_SAMPLES)x$(DFCLF_FEATURES).npy: | data/reg/
python make_datasets.py -f $(DFCLF_FEATURES) -s $(DFCLF_SAMPLES) \
regression -x $@ -y $(dir $@)/$(subst X-,y-,$(notdir $@))
data/%/:
mkdir -p $@
clean:
$(MAKE) -C native clean
rm -rf data
rm -rf output
.PRECIOUS: output/sklearn/ output/native/ output/daal4py/
.PHONY: native python sklearn daal4py all clean native_data data kmeans_data svm_data logreg_data df_clf_data
.DELETE_ON_ERROR: ;