-
Notifications
You must be signed in to change notification settings - Fork 13
Notes on the MPI benchmarks of the wavelet operator
David Pérez-Suárez edited this page Nov 9, 2021
·
2 revisions
Using environment: intel
Running on 16 slots:
1 MPI tasks
16 threads per task
TMPDIR=/tmpdir/job/4099872.undefined
Contents of machinefile:
node-u07a-027
node-u07a-027
node-u07a-027
node-u07a-027
node-u07a-027
node-u07a-027
node-u07a-027
node-u07a-027
node-u07a-027
node-u07a-027
node-u07a-027
node-u07a-027
node-u07a-027
node-u07a-027
node-u07a-027
node-u07a-027
--------------------------------------------------------------------------------------------------------------
Benchmark Time CPU Iterations
--------------------------------------------------------------------------------------------------------------
[0;32mwavelet_operator_constructor_mpi/1024/repeats:10/manual_time_mean [m[0;33m 0 ms 0 ms [m[0;36m 28387[m
[m[0;32mwavelet_operator_constructor_mpi/1024/repeats:10/manual_time_median [m[0;33m 0 ms 0 ms [m[0;36m 28387[m
[m[0;32mwavelet_operator_constructor_mpi/1024/repeats:10/manual_time_stddev [m[0;33m 0 ms 0 ms [m[0;36m 28387[m
[m[0;32mwavelet_operator_constructor_mpi/2048/repeats:10/manual_time_mean [m[0;33m 0 ms 0 ms [m[0;36m 28441[m
[m[0;32mwavelet_operator_constructor_mpi/2048/repeats:10/manual_time_median [m[0;33m 0 ms 0 ms [m[0;36m 28441[m
[m[0;32mwavelet_operator_constructor_mpi/2048/repeats:10/manual_time_stddev [m[0;33m 0 ms 0 ms [m[0;36m 28441[m
[m[0;32mwavelet_operator_constructor_mpi/4096/repeats:10/manual_time_mean [m[0;33m 0 ms 0 ms [m[0;36m 27908[m
[m[0;32mwavelet_operator_constructor_mpi/4096/repeats:10/manual_time_median [m[0;33m 0 ms 0 ms [m[0;36m 27908[m
[m[0;32mwavelet_operator_constructor_mpi/4096/repeats:10/manual_time_stddev [m[0;33m 0 ms 0 ms [m[0;36m 27908[m
[m[0;32mwavelet_operator_constructor_mpi/8192/repeats:10/manual_time_mean [m[0;33m 0 ms 0 ms [m[0;36m 28496[m
[m[0;32mwavelet_operator_constructor_mpi/8192/repeats:10/manual_time_median [m[0;33m 0 ms 0 ms [m[0;36m 28496[m
[m[0;32mwavelet_operator_constructor_mpi/8192/repeats:10/manual_time_stddev [m[0;33m 0 ms 0 ms [m[0;36m 28496[m
[m[0;32mwavelet_operator_constructor_mpi/16384/repeats:10/manual_time_mean [m[0;33m 0 ms 0 ms [m[0;36m 27886[m
[m[0;32mwavelet_operator_constructor_mpi/16384/repeats:10/manual_time_median [m[0;33m 0 ms 0 ms [m[0;36m 27886[m
[m[0;32mwavelet_operator_constructor_mpi/16384/repeats:10/manual_time_stddev [m[0;33m 0 ms 0 ms [m[0;36m 27886[m
[m[0;32mwavelet_operator_constructor_mpi/32768/repeats:10/manual_time_mean [m[0;33m 0 ms 0 ms [m[0;36m 27909[m
[m[0;32mwavelet_operator_constructor_mpi/32768/repeats:10/manual_time_median [m[0;33m 0 ms 0 ms [m[0;36m 27909[m
[m[0;32mwavelet_operator_constructor_mpi/32768/repeats:10/manual_time_stddev [m[0;33m 0 ms 0 ms [m[0;36m 27909[m
[m[0;32mwavelet_operator_constructor_mpi/65536/repeats:10/manual_time_mean [m[0;33m 0 ms 0 ms [m[0;36m 28476[m
[m[0;32mwavelet_operator_constructor_mpi/65536/repeats:10/manual_time_median [m[0;33m 0 ms 0 ms [m[0;36m 28476[m
[m[0;32mwavelet_operator_constructor_mpi/65536/repeats:10/manual_time_stddev [m[0;33m 0 ms 0 ms [m[0;36m 28476[m
[m[0;32mwavelet_operator_constructor_mpi/131072/repeats:10/manual_time_mean [m[0;33m 0 ms 0 ms [m[0;36m 28481[m
[m[0;32mwavelet_operator_constructor_mpi/131072/repeats:10/manual_time_median [m[0;33m 0 ms 0 ms [m[0;36m 28481[m
[m[0;32mwavelet_operator_constructor_mpi/131072/repeats:10/manual_time_stddev [m[0;33m 0 ms 0 ms [m[0;36m 28481[m
[m[0;32mwavelet_operator_constructor_mpi/262144/repeats:10/manual_time_mean [m[0;33m 0 ms 0 ms [m[0;36m 28521[m
[m[0;32mwavelet_operator_constructor_mpi/262144/repeats:10/manual_time_median [m[0;33m 0 ms 0 ms [m[0;36m 28521[m
[m[0;32mwavelet_operator_constructor_mpi/262144/repeats:10/manual_time_stddev [m[0;33m 0 ms 0 ms [m[0;36m 28521[m
[m[0;32mwavelet_operator_constructor_mpi/524288/repeats:10/manual_time_mean [m[0;33m 0 ms 0 ms [m[0;36m 29223[m
[m[0;32mwavelet_operator_constructor_mpi/524288/repeats:10/manual_time_median [m[0;33m 0 ms 0 ms [m[0;36m 29223[m
[m[0;32mwavelet_operator_constructor_mpi/524288/repeats:10/manual_time_stddev [m[0;33m 0 ms 0 ms [m[0;36m 29223[m
[m[0;32mwavelet_operator_constructor_mpi/1048576/repeats:10/manual_time_mean [m[0;33m 0 ms 0 ms [m[0;36m 28508[m
[m[0;32mwavelet_operator_constructor_mpi/1048576/repeats:10/manual_time_median [m[0;33m 0 ms 0 ms [m[0;36m 28508[m
[m[0;32mwavelet_operator_constructor_mpi/1048576/repeats:10/manual_time_stddev [m[0;33m 0 ms 0 ms [m[0;36m 28508[m
[m
As you can see it's not MPI ! (Only one process)
Using environment: intel
Running on 16 slots:
1 MPI tasks
16 threads per task
TMPDIR=/tmpdir/job/4122925.undefined
Contents of machinefile:
node-u07a-024
node-u07a-024
node-u07a-024
node-u07a-024
node-u07a-024
node-u07a-024
node-u07a-024
node-u07a-024
node-u07a-024
node-u07a-024
node-u07a-024
node-u07a-024
node-u07a-024
node-u07a-024
node-u07a-024
node-u07a-024
------------------------------------------------------------------------------------------------------------
Benchmark Time CPU Iterations
------------------------------------------------------------------------------------------------------------
[0;32mwavelet_operator_constructor_mpi/1024/repeats:10/manual_time_mean [m[0;33m 0 ms 0 ms [m[0;36m 29940[m
[m[0;32mwavelet_operator_constructor_mpi/1024/repeats:10/manual_time_median [m[0;33m 0 ms 0 ms [m[0;36m 29940[m
[m[0;32mwavelet_operator_constructor_mpi/1024/repeats:10/manual_time_stddev [m[0;33m 0 ms 0 ms [m[0;36m 29940[m
[m[0;32mwavelet_operator_constructor_mpi/2048/repeats:10/manual_time_mean [m[0;33m 0 ms 0 ms [m[0;36m 23240[m
[m[0;32mwavelet_operator_constructor_mpi/2048/repeats:10/manual_time_median [m[0;33m 0 ms 0 ms [m[0;36m 23240[m
[m[0;32mwavelet_operator_constructor_mpi/2048/repeats:10/manual_time_stddev [m[0;33m 0 ms 0 ms [m[0;36m 23240[m
[m[0;32mwavelet_operator_constructor_mpi/4096/repeats:10/manual_time_mean [m[0;33m 0 ms 0 ms [m[0;36m 23381[m
[m[0;32mwavelet_operator_constructor_mpi/4096/repeats:10/manual_time_median [m[0;33m 0 ms 0 ms [m[0;36m 23381[m
[m[0;32mwavelet_operator_constructor_mpi/4096/repeats:10/manual_time_stddev [m[0;33m 0 ms 0 ms [m[0;36m 23381[m
[m[0;32mwavelet_operator_constructor_mpi/8192/repeats:10/manual_time_mean [m[0;33m 0 ms 0 ms [m[0;36m 23285[m
[m[0;32mwavelet_operator_constructor_mpi/8192/repeats:10/manual_time_median [m[0;33m 0 ms 0 ms [m[0;36m 23285[m
[m[0;32mwavelet_operator_constructor_mpi/8192/repeats:10/manual_time_stddev [m[0;33m 0 ms 0 ms [m[0;36m 23285[m
[m[0;32mwavelet_operator_constructor_mpi/16384/repeats:10/manual_time_mean [m[0;33m 0 ms 0 ms [m[0;36m 23307[m
[m[0;32mwavelet_operator_constructor_mpi/16384/repeats:10/manual_time_median [m[0;33m 0 ms 0 ms [m[0;36m 23307[m
[m[0;32mwavelet_operator_constructor_mpi/16384/repeats:10/manual_time_stddev [m[0;33m 0 ms 0 ms [m[0;36m 23307[m
[m[0;32mWaveletOperatorMPIFixture/Apply/1024/repeats:10/manual_time_mean [m[0;33m 390 ms 362 ms [m[0;36m 2[m
[m[0;32mWaveletOperatorMPIFixture/Apply/1024/repeats:10/manual_time_median [m[0;33m 390 ms 361 ms [m[0;36m 2[m
[m[0;32mWaveletOperatorMPIFixture/Apply/1024/repeats:10/manual_time_stddev [m[0;33m 1 ms 4 ms [m[0;36m 2[m
[m[0;32mWaveletOperatorMPIFixture/Apply/2048/repeats:10/manual_time_mean [m[0;33m 1705 ms 925 ms [m[0;36m 1[m
[m[0;32mWaveletOperatorMPIFixture/Apply/2048/repeats:10/manual_time_median [m[0;33m 1696 ms 920 ms [m[0;36m 1[m
[m[0;32mWaveletOperatorMPIFixture/Apply/2048/repeats:10/manual_time_stddev [m[0;33m 42 ms 42 ms [m[0;36m 1[m
[m[0;32mWaveletOperatorMPIFixture/Apply/4096/repeats:10/manual_time_mean [m[0;33m 8903 ms 2680 ms [m[0;36m 1[m
[m[0;32mWaveletOperatorMPIFixture/Apply/4096/repeats:10/manual_time_median [m[0;33m 8989 ms 2674 ms [m[0;36m 1[m
[m[0;32mWaveletOperatorMPIFixture/Apply/4096/repeats:10/manual_time_stddev [m[0;33m 241 ms 44 ms [m[0;36m 1[m
[m[0;32mWaveletOperatorMPIFixture/Apply/8192/repeats:10/manual_time_mean [m[0;33m 33927 ms 8302 ms [m[0;36m 1[m
[m[0;32mWaveletOperatorMPIFixture/Apply/8192/repeats:10/manual_time_median [m[0;33m 33980 ms 7999 ms [m[0;36m 1[m
[m[0;32mWaveletOperatorMPIFixture/Apply/8192/repeats:10/manual_time_stddev [m[0;33m 965 ms 806 ms [m[0;36m 1[m
[m
Same conclusion.
Using environment: intel
Running on 16 slots:
1 MPI tasks
16 threads per task
TMPDIR=/tmpdir/job/4149120.undefined
Contents of machinefile:
node-u07a-029
node-u07a-029
node-u07a-029
node-u07a-029
node-u07a-029
node-u07a-029
node-u07a-029
node-u07a-029
node-u07a-029
node-u07a-029
node-u07a-029
node-u07a-029
node-u07a-029
node-u07a-029
node-u07a-029
node-u07a-029
------------------------------------------------------------------------------------------------------------
Benchmark Time CPU Iterations
------------------------------------------------------------------------------------------------------------
[0;32mwavelet_operator_constructor_mpi/1024/repeats:10/manual_time_mean [m[0;33m 0 ms 0 ms [m[0;36m 29886[m
[m[0;32mwavelet_operator_constructor_mpi/1024/repeats:10/manual_time_median [m[0;33m 0 ms 0 ms [m[0;36m 29886[m
[m[0;32mwavelet_operator_constructor_mpi/1024/repeats:10/manual_time_stddev [m[0;33m 0 ms 0 ms [m[0;36m 29886[m
[m[0;32mwavelet_operator_constructor_mpi/2048/repeats:10/manual_time_mean [m[0;33m 0 ms 0 ms [m[0;36m 29959[m
[m[0;32mwavelet_operator_constructor_mpi/2048/repeats:10/manual_time_median [m[0;33m 0 ms 0 ms [m[0;36m 29959[m
[m[0;32mwavelet_operator_constructor_mpi/2048/repeats:10/manual_time_stddev [m[0;33m 0 ms 0 ms [m[0;36m 29959[m
[m[0;32mwavelet_operator_constructor_mpi/4096/repeats:10/manual_time_mean [m[0;33m 0 ms 0 ms [m[0;36m 29947[m
[m[0;32mwavelet_operator_constructor_mpi/4096/repeats:10/manual_time_median [m[0;33m 0 ms 0 ms [m[0;36m 29947[m
[m[0;32mwavelet_operator_constructor_mpi/4096/repeats:10/manual_time_stddev [m[0;33m 0 ms 0 ms [m[0;36m 29947[m
[m[0;32mwavelet_operator_constructor_mpi/8192/repeats:10/manual_time_mean [m[0;33m 0 ms 0 ms [m[0;36m 29911[m
[m[0;32mwavelet_operator_constructor_mpi/8192/repeats:10/manual_time_median [m[0;33m 0 ms 0 ms [m[0;36m 29911[m
[m[0;32mwavelet_operator_constructor_mpi/8192/repeats:10/manual_time_stddev [m[0;33m 0 ms 0 ms [m[0;36m 29911[m
[m[0;32mwavelet_operator_constructor_mpi/16384/repeats:10/manual_time_mean [m[0;33m 0 ms 0 ms [m[0;36m 29937[m
[m[0;32mwavelet_operator_constructor_mpi/16384/repeats:10/manual_time_median [m[0;33m 0 ms 0 ms [m[0;36m 29937[m
[m[0;32mwavelet_operator_constructor_mpi/16384/repeats:10/manual_time_stddev [m[0;33m 0 ms 0 ms [m[0;36m 29937[m
[m[0;32mWaveletOperatorMPIFixture/Apply/1024/repeats:10/manual_time_mean [m[0;33m 367 ms 364 ms [m[0;36m 2[m
[m[0;32mWaveletOperatorMPIFixture/Apply/1024/repeats:10/manual_time_median [m[0;33m 365 ms 362 ms [m[0;36m 2[m
[m[0;32mWaveletOperatorMPIFixture/Apply/1024/repeats:10/manual_time_stddev [m[0;33m 6 ms 5 ms [m[0;36m 2[m
[m[0;32mWaveletOperatorMPIFixture/Apply/2048/repeats:10/manual_time_mean [m[0;33m 1595 ms 890 ms [m[0;36m 1[m
[m[0;32mWaveletOperatorMPIFixture/Apply/2048/repeats:10/manual_time_median [m[0;33m 1576 ms 876 ms [m[0;36m 1[m
[m[0;32mWaveletOperatorMPIFixture/Apply/2048/repeats:10/manual_time_stddev [m[0;33m 83 ms 89 ms [m[0;36m 1[m
[m[0;32mWaveletOperatorMPIFixture/Apply/4096/repeats:10/manual_time_mean [m[0;33m 6703 ms 2195 ms [m[0;36m 1[m
[m[0;32mWaveletOperatorMPIFixture/Apply/4096/repeats:10/manual_time_median [m[0;33m 6579 ms 2174 ms [m[0;36m 1[m
[m[0;32mWaveletOperatorMPIFixture/Apply/4096/repeats:10/manual_time_stddev [m[0;33m 424 ms 86 ms [m[0;36m 1[m
[m[0;32mWaveletOperatorMPIFixture/Apply/8192/repeats:10/manual_time_mean [m[0;33m 78217 ms 47922 ms [m[0;36m 1[m
[m[0;32mWaveletOperatorMPIFixture/Apply/8192/repeats:10/manual_time_median [m[0;33m 70461 ms 40579 ms [m[0;36m 1[m
[m[0;32mWaveletOperatorMPIFixture/Apply/8192/repeats:10/manual_time_stddev [m[0;33m 20095 ms 19103 ms [m[0;36m 1[m
[m