Skip to content

Notes on the MPI benchmarks of the wavelet operator

David Pérez-Suárez edited this page Nov 9, 2021 · 2 revisions

First MPI benchmark

Using environment: intel
Running on 16 slots:
    1 MPI tasks
   16 threads per task
TMPDIR=/tmpdir/job/4099872.undefined

Contents of machinefile:
node-u07a-027
node-u07a-027
node-u07a-027
node-u07a-027
node-u07a-027
node-u07a-027
node-u07a-027
node-u07a-027
node-u07a-027
node-u07a-027
node-u07a-027
node-u07a-027
node-u07a-027
node-u07a-027
node-u07a-027
node-u07a-027

--------------------------------------------------------------------------------------------------------------
Benchmark                                                                       Time           CPU Iterations
--------------------------------------------------------------------------------------------------------------
[0;32mwavelet_operator_constructor_mpi/1024/repeats:10/manual_time_mean      [m[0;33m         0 ms          0 ms [m[0;36m     28387[m
[m[0;32mwavelet_operator_constructor_mpi/1024/repeats:10/manual_time_median    [m[0;33m         0 ms          0 ms [m[0;36m     28387[m
[m[0;32mwavelet_operator_constructor_mpi/1024/repeats:10/manual_time_stddev    [m[0;33m         0 ms          0 ms [m[0;36m     28387[m
[m[0;32mwavelet_operator_constructor_mpi/2048/repeats:10/manual_time_mean      [m[0;33m         0 ms          0 ms [m[0;36m     28441[m
[m[0;32mwavelet_operator_constructor_mpi/2048/repeats:10/manual_time_median    [m[0;33m         0 ms          0 ms [m[0;36m     28441[m
[m[0;32mwavelet_operator_constructor_mpi/2048/repeats:10/manual_time_stddev    [m[0;33m         0 ms          0 ms [m[0;36m     28441[m
[m[0;32mwavelet_operator_constructor_mpi/4096/repeats:10/manual_time_mean      [m[0;33m         0 ms          0 ms [m[0;36m     27908[m
[m[0;32mwavelet_operator_constructor_mpi/4096/repeats:10/manual_time_median    [m[0;33m         0 ms          0 ms [m[0;36m     27908[m
[m[0;32mwavelet_operator_constructor_mpi/4096/repeats:10/manual_time_stddev    [m[0;33m         0 ms          0 ms [m[0;36m     27908[m
[m[0;32mwavelet_operator_constructor_mpi/8192/repeats:10/manual_time_mean      [m[0;33m         0 ms          0 ms [m[0;36m     28496[m
[m[0;32mwavelet_operator_constructor_mpi/8192/repeats:10/manual_time_median    [m[0;33m         0 ms          0 ms [m[0;36m     28496[m
[m[0;32mwavelet_operator_constructor_mpi/8192/repeats:10/manual_time_stddev    [m[0;33m         0 ms          0 ms [m[0;36m     28496[m
[m[0;32mwavelet_operator_constructor_mpi/16384/repeats:10/manual_time_mean     [m[0;33m         0 ms          0 ms [m[0;36m     27886[m
[m[0;32mwavelet_operator_constructor_mpi/16384/repeats:10/manual_time_median   [m[0;33m         0 ms          0 ms [m[0;36m     27886[m
[m[0;32mwavelet_operator_constructor_mpi/16384/repeats:10/manual_time_stddev   [m[0;33m         0 ms          0 ms [m[0;36m     27886[m
[m[0;32mwavelet_operator_constructor_mpi/32768/repeats:10/manual_time_mean     [m[0;33m         0 ms          0 ms [m[0;36m     27909[m
[m[0;32mwavelet_operator_constructor_mpi/32768/repeats:10/manual_time_median   [m[0;33m         0 ms          0 ms [m[0;36m     27909[m
[m[0;32mwavelet_operator_constructor_mpi/32768/repeats:10/manual_time_stddev   [m[0;33m         0 ms          0 ms [m[0;36m     27909[m
[m[0;32mwavelet_operator_constructor_mpi/65536/repeats:10/manual_time_mean     [m[0;33m         0 ms          0 ms [m[0;36m     28476[m
[m[0;32mwavelet_operator_constructor_mpi/65536/repeats:10/manual_time_median   [m[0;33m         0 ms          0 ms [m[0;36m     28476[m
[m[0;32mwavelet_operator_constructor_mpi/65536/repeats:10/manual_time_stddev   [m[0;33m         0 ms          0 ms [m[0;36m     28476[m
[m[0;32mwavelet_operator_constructor_mpi/131072/repeats:10/manual_time_mean    [m[0;33m         0 ms          0 ms [m[0;36m     28481[m
[m[0;32mwavelet_operator_constructor_mpi/131072/repeats:10/manual_time_median  [m[0;33m         0 ms          0 ms [m[0;36m     28481[m
[m[0;32mwavelet_operator_constructor_mpi/131072/repeats:10/manual_time_stddev  [m[0;33m         0 ms          0 ms [m[0;36m     28481[m
[m[0;32mwavelet_operator_constructor_mpi/262144/repeats:10/manual_time_mean    [m[0;33m         0 ms          0 ms [m[0;36m     28521[m
[m[0;32mwavelet_operator_constructor_mpi/262144/repeats:10/manual_time_median  [m[0;33m         0 ms          0 ms [m[0;36m     28521[m
[m[0;32mwavelet_operator_constructor_mpi/262144/repeats:10/manual_time_stddev  [m[0;33m         0 ms          0 ms [m[0;36m     28521[m
[m[0;32mwavelet_operator_constructor_mpi/524288/repeats:10/manual_time_mean    [m[0;33m         0 ms          0 ms [m[0;36m     29223[m
[m[0;32mwavelet_operator_constructor_mpi/524288/repeats:10/manual_time_median  [m[0;33m         0 ms          0 ms [m[0;36m     29223[m
[m[0;32mwavelet_operator_constructor_mpi/524288/repeats:10/manual_time_stddev  [m[0;33m         0 ms          0 ms [m[0;36m     29223[m
[m[0;32mwavelet_operator_constructor_mpi/1048576/repeats:10/manual_time_mean   [m[0;33m         0 ms          0 ms [m[0;36m     28508[m
[m[0;32mwavelet_operator_constructor_mpi/1048576/repeats:10/manual_time_median [m[0;33m         0 ms          0 ms [m[0;36m     28508[m
[m[0;32mwavelet_operator_constructor_mpi/1048576/repeats:10/manual_time_stddev [m[0;33m         0 ms          0 ms [m[0;36m     28508[m
[m

As you can see it's not MPI ! (Only one process)

Second MPI run

Using environment: intel
Running on 16 slots:
    1 MPI tasks
   16 threads per task
TMPDIR=/tmpdir/job/4122925.undefined

Contents of machinefile:
node-u07a-024
node-u07a-024
node-u07a-024
node-u07a-024
node-u07a-024
node-u07a-024
node-u07a-024
node-u07a-024
node-u07a-024
node-u07a-024
node-u07a-024
node-u07a-024
node-u07a-024
node-u07a-024
node-u07a-024
node-u07a-024

------------------------------------------------------------------------------------------------------------
Benchmark                                                                     Time           CPU Iterations
------------------------------------------------------------------------------------------------------------
[0;32mwavelet_operator_constructor_mpi/1024/repeats:10/manual_time_mean    [m[0;33m         0 ms          0 ms [m[0;36m     29940[m
[m[0;32mwavelet_operator_constructor_mpi/1024/repeats:10/manual_time_median  [m[0;33m         0 ms          0 ms [m[0;36m     29940[m
[m[0;32mwavelet_operator_constructor_mpi/1024/repeats:10/manual_time_stddev  [m[0;33m         0 ms          0 ms [m[0;36m     29940[m
[m[0;32mwavelet_operator_constructor_mpi/2048/repeats:10/manual_time_mean    [m[0;33m         0 ms          0 ms [m[0;36m     23240[m
[m[0;32mwavelet_operator_constructor_mpi/2048/repeats:10/manual_time_median  [m[0;33m         0 ms          0 ms [m[0;36m     23240[m
[m[0;32mwavelet_operator_constructor_mpi/2048/repeats:10/manual_time_stddev  [m[0;33m         0 ms          0 ms [m[0;36m     23240[m
[m[0;32mwavelet_operator_constructor_mpi/4096/repeats:10/manual_time_mean    [m[0;33m         0 ms          0 ms [m[0;36m     23381[m
[m[0;32mwavelet_operator_constructor_mpi/4096/repeats:10/manual_time_median  [m[0;33m         0 ms          0 ms [m[0;36m     23381[m
[m[0;32mwavelet_operator_constructor_mpi/4096/repeats:10/manual_time_stddev  [m[0;33m         0 ms          0 ms [m[0;36m     23381[m
[m[0;32mwavelet_operator_constructor_mpi/8192/repeats:10/manual_time_mean    [m[0;33m         0 ms          0 ms [m[0;36m     23285[m
[m[0;32mwavelet_operator_constructor_mpi/8192/repeats:10/manual_time_median  [m[0;33m         0 ms          0 ms [m[0;36m     23285[m
[m[0;32mwavelet_operator_constructor_mpi/8192/repeats:10/manual_time_stddev  [m[0;33m         0 ms          0 ms [m[0;36m     23285[m
[m[0;32mwavelet_operator_constructor_mpi/16384/repeats:10/manual_time_mean   [m[0;33m         0 ms          0 ms [m[0;36m     23307[m
[m[0;32mwavelet_operator_constructor_mpi/16384/repeats:10/manual_time_median [m[0;33m         0 ms          0 ms [m[0;36m     23307[m
[m[0;32mwavelet_operator_constructor_mpi/16384/repeats:10/manual_time_stddev [m[0;33m         0 ms          0 ms [m[0;36m     23307[m
[m[0;32mWaveletOperatorMPIFixture/Apply/1024/repeats:10/manual_time_mean     [m[0;33m       390 ms        362 ms [m[0;36m         2[m
[m[0;32mWaveletOperatorMPIFixture/Apply/1024/repeats:10/manual_time_median   [m[0;33m       390 ms        361 ms [m[0;36m         2[m
[m[0;32mWaveletOperatorMPIFixture/Apply/1024/repeats:10/manual_time_stddev   [m[0;33m         1 ms          4 ms [m[0;36m         2[m
[m[0;32mWaveletOperatorMPIFixture/Apply/2048/repeats:10/manual_time_mean     [m[0;33m      1705 ms        925 ms [m[0;36m         1[m
[m[0;32mWaveletOperatorMPIFixture/Apply/2048/repeats:10/manual_time_median   [m[0;33m      1696 ms        920 ms [m[0;36m         1[m
[m[0;32mWaveletOperatorMPIFixture/Apply/2048/repeats:10/manual_time_stddev   [m[0;33m        42 ms         42 ms [m[0;36m         1[m
[m[0;32mWaveletOperatorMPIFixture/Apply/4096/repeats:10/manual_time_mean     [m[0;33m      8903 ms       2680 ms [m[0;36m         1[m
[m[0;32mWaveletOperatorMPIFixture/Apply/4096/repeats:10/manual_time_median   [m[0;33m      8989 ms       2674 ms [m[0;36m         1[m
[m[0;32mWaveletOperatorMPIFixture/Apply/4096/repeats:10/manual_time_stddev   [m[0;33m       241 ms         44 ms [m[0;36m         1[m
[m[0;32mWaveletOperatorMPIFixture/Apply/8192/repeats:10/manual_time_mean     [m[0;33m     33927 ms       8302 ms [m[0;36m         1[m
[m[0;32mWaveletOperatorMPIFixture/Apply/8192/repeats:10/manual_time_median   [m[0;33m     33980 ms       7999 ms [m[0;36m         1[m
[m[0;32mWaveletOperatorMPIFixture/Apply/8192/repeats:10/manual_time_stddev   [m[0;33m       965 ms        806 ms [m[0;36m         1[m
[m

Same conclusion.

Third MPI run

Using environment: intel
Running on 16 slots:
    1 MPI tasks
   16 threads per task
TMPDIR=/tmpdir/job/4149120.undefined

Contents of machinefile:
node-u07a-029
node-u07a-029
node-u07a-029
node-u07a-029
node-u07a-029
node-u07a-029
node-u07a-029
node-u07a-029
node-u07a-029
node-u07a-029
node-u07a-029
node-u07a-029
node-u07a-029
node-u07a-029
node-u07a-029
node-u07a-029

------------------------------------------------------------------------------------------------------------
Benchmark                                                                     Time           CPU Iterations
------------------------------------------------------------------------------------------------------------
[0;32mwavelet_operator_constructor_mpi/1024/repeats:10/manual_time_mean    [m[0;33m         0 ms          0 ms [m[0;36m     29886[m
[m[0;32mwavelet_operator_constructor_mpi/1024/repeats:10/manual_time_median  [m[0;33m         0 ms          0 ms [m[0;36m     29886[m
[m[0;32mwavelet_operator_constructor_mpi/1024/repeats:10/manual_time_stddev  [m[0;33m         0 ms          0 ms [m[0;36m     29886[m
[m[0;32mwavelet_operator_constructor_mpi/2048/repeats:10/manual_time_mean    [m[0;33m         0 ms          0 ms [m[0;36m     29959[m
[m[0;32mwavelet_operator_constructor_mpi/2048/repeats:10/manual_time_median  [m[0;33m         0 ms          0 ms [m[0;36m     29959[m
[m[0;32mwavelet_operator_constructor_mpi/2048/repeats:10/manual_time_stddev  [m[0;33m         0 ms          0 ms [m[0;36m     29959[m
[m[0;32mwavelet_operator_constructor_mpi/4096/repeats:10/manual_time_mean    [m[0;33m         0 ms          0 ms [m[0;36m     29947[m
[m[0;32mwavelet_operator_constructor_mpi/4096/repeats:10/manual_time_median  [m[0;33m         0 ms          0 ms [m[0;36m     29947[m
[m[0;32mwavelet_operator_constructor_mpi/4096/repeats:10/manual_time_stddev  [m[0;33m         0 ms          0 ms [m[0;36m     29947[m
[m[0;32mwavelet_operator_constructor_mpi/8192/repeats:10/manual_time_mean    [m[0;33m         0 ms          0 ms [m[0;36m     29911[m
[m[0;32mwavelet_operator_constructor_mpi/8192/repeats:10/manual_time_median  [m[0;33m         0 ms          0 ms [m[0;36m     29911[m
[m[0;32mwavelet_operator_constructor_mpi/8192/repeats:10/manual_time_stddev  [m[0;33m         0 ms          0 ms [m[0;36m     29911[m
[m[0;32mwavelet_operator_constructor_mpi/16384/repeats:10/manual_time_mean   [m[0;33m         0 ms          0 ms [m[0;36m     29937[m
[m[0;32mwavelet_operator_constructor_mpi/16384/repeats:10/manual_time_median [m[0;33m         0 ms          0 ms [m[0;36m     29937[m
[m[0;32mwavelet_operator_constructor_mpi/16384/repeats:10/manual_time_stddev [m[0;33m         0 ms          0 ms [m[0;36m     29937[m
[m[0;32mWaveletOperatorMPIFixture/Apply/1024/repeats:10/manual_time_mean     [m[0;33m       367 ms        364 ms [m[0;36m         2[m
[m[0;32mWaveletOperatorMPIFixture/Apply/1024/repeats:10/manual_time_median   [m[0;33m       365 ms        362 ms [m[0;36m         2[m
[m[0;32mWaveletOperatorMPIFixture/Apply/1024/repeats:10/manual_time_stddev   [m[0;33m         6 ms          5 ms [m[0;36m         2[m
[m[0;32mWaveletOperatorMPIFixture/Apply/2048/repeats:10/manual_time_mean     [m[0;33m      1595 ms        890 ms [m[0;36m         1[m
[m[0;32mWaveletOperatorMPIFixture/Apply/2048/repeats:10/manual_time_median   [m[0;33m      1576 ms        876 ms [m[0;36m         1[m
[m[0;32mWaveletOperatorMPIFixture/Apply/2048/repeats:10/manual_time_stddev   [m[0;33m        83 ms         89 ms [m[0;36m         1[m
[m[0;32mWaveletOperatorMPIFixture/Apply/4096/repeats:10/manual_time_mean     [m[0;33m      6703 ms       2195 ms [m[0;36m         1[m
[m[0;32mWaveletOperatorMPIFixture/Apply/4096/repeats:10/manual_time_median   [m[0;33m      6579 ms       2174 ms [m[0;36m         1[m
[m[0;32mWaveletOperatorMPIFixture/Apply/4096/repeats:10/manual_time_stddev   [m[0;33m       424 ms         86 ms [m[0;36m         1[m
[m[0;32mWaveletOperatorMPIFixture/Apply/8192/repeats:10/manual_time_mean     [m[0;33m     78217 ms      47922 ms [m[0;36m         1[m
[m[0;32mWaveletOperatorMPIFixture/Apply/8192/repeats:10/manual_time_median   [m[0;33m     70461 ms      40579 ms [m[0;36m         1[m
[m[0;32mWaveletOperatorMPIFixture/Apply/8192/repeats:10/manual_time_stddev   [m[0;33m     20095 ms      19103 ms [m[0;36m         1[m
[m