From fdd710aded2a8ee7ae8cf8f3c72a1d915f1e06af Mon Sep 17 00:00:00 2001 From: zhenwei pi Date: Wed, 9 Jun 2021 20:04:40 +0800 Subject: [PATCH] memory benchmark: support auto-detect CPU L3 cache During writing to a small memory block, the CPU just writes to L3 cache. So use a memory block size larger than L3 cache size should be better. Allow to specify --memory-block-size=0, then sysbench auto-detect CPU L3 cache size and alignup to power of 2 to do test work. For example: Orignally, run this command on my PC and got a result 47634.81 MiB/sec # sysbench memory --memory-scope=local --threads=12 run In face, the real performance is about 15G/s. The test result gets about 300% deviation. A test case should be added in test_memory.t, but the github CI failed. I tried to reproduce on an ARM server, but this still work well. Maybe this test case could be added in the future. $ sysbench memory --memory-scope=local --memory-oper=write --memory-total-size=1G --memory-block-size=0 --events=1 --time=0 --threads=2 run sysbench *.* * (glob) Running the test with following options: Number of threads: 2 Initializing random number generator from current time Running memory speed test with the following options: block size: * (glob) total size: 1024MiB operation: write scope: local Initializing worker threads... Threads started! Total operations: * (* per second) (glob) 1024.00 MiB transferred (* MiB/sec) (glob) Throughput: events/s (eps): * (glob) time elapsed: *s (glob) total number of events: * (glob) Latency (ms): min: *.* (glob) avg: *.* (glob) max: *.* (glob) 95th percentile: *.* (glob) sum: *.* (glob) Threads fairness: events (avg/stddev): */* (glob) execution time (avg/stddev): */* (glob) $ sysbench $args cleanup sysbench *.* * (glob) Signed-off-by: zhenwei pi --- src/tests/memory/sb_memory.c | 72 +++++++++++++++++++++++++++++++++--- tests/t/test_memory.t | 18 ++------- 2 files changed, 69 insertions(+), 21 deletions(-) diff --git a/src/tests/memory/sb_memory.c b/src/tests/memory/sb_memory.c index 27ecae843..e3c23fcf6 100644 --- a/src/tests/memory/sb_memory.c +++ b/src/tests/memory/sb_memory.c @@ -31,6 +31,19 @@ # include #endif +#ifdef HAVE_UNISTD_H +# include +# include +#endif + +#ifdef HAVE_SYS_STAT_H +# include +#endif + +#ifdef HAVE_FCNTL_H +# include +#endif + #include #define LARGE_PAGE_SIZE (4UL * 1024 * 1024) @@ -38,7 +51,8 @@ /* Memory test arguments */ static sb_arg_t memory_args[] = { - SB_OPT("memory-block-size", "size of memory block for test", "1K", SIZE), + /* A typical size of a morden CPU, Ex Intel(R) Xeon(R) Platinum 8260 has 36608K */ + SB_OPT("memory-block-size", "size of memory block for test. If 0, auto-detect CPU L3 cache and apply", "65536K", SIZE), SB_OPT("memory-total-size", "total size of data to transfer", "100G", SIZE), SB_OPT("memory-scope", "memory access scope {global,local}", "global", STRING), @@ -107,6 +121,35 @@ int register_test_memory(sb_list_t *tests) return 0; } +static size_t memory_detect_l3_size(void) +{ + int file; + char *l3cache_path = "/sys/devices/system/cpu/cpu0/cache/index3/size"; + char buf[16] = {0}; + size_t buflen; + size_t sizekb; + size_t alignkb = 1; + + file = open(l3cache_path, O_RDONLY, 0); + if (file < 0) + return -1; + + if (read(file, buf, sizeof(buf)) > 0) { + buflen = strlen(buf); + /* try to strip last '\n' */ + if (buf[buflen - 1] == '\n') + buf[buflen - 1] = '\0'; + + /* to make sure memory block size is larger than L3 cache size */ + sizekb = atol(buf); + while (alignkb < sizekb) + alignkb <<= 1; + } + + close(file); + + return alignkb * 1024; +} int memory_init(void) { @@ -115,12 +158,29 @@ int memory_init(void) size_t *buffer; memory_block_size = sb_get_value_size("memory-block-size"); - if (memory_block_size < SIZEOF_SIZE_T || - /* Must be a power of 2 */ - (memory_block_size & (memory_block_size - 1)) != 0) + if (memory_block_size && (memory_block_size < SIZEOF_SIZE_T)) + { + log_text(LOG_FATAL, "Invalid value for memory-block-size: %s, " + "should not less than %d, or specify 0 to auto-detect CPU L3 cache size", + sb_get_value_string("memory-block-size"), SIZEOF_SIZE_T); + return 1; + } + + if (!memory_block_size) + { + /* auto detect L3 cache size */ + memory_block_size = memory_detect_l3_size(); + if (memory_block_size < 0) + { + log_text(LOG_FATAL, "Auto-detect memory-block-size failed"); + return 1; + } + } + + /* Must be a power of 2 */ + if ((memory_block_size & (memory_block_size - 1)) != 0) { - log_text(LOG_FATAL, "Invalid value for memory-block-size: %s", - sb_get_value_string("memory-block-size")); + log_text(LOG_FATAL, "Invalid value for memory-block-size: %ld, should be a power of 2", memory_block_size); return 1; } diff --git a/tests/t/test_memory.t b/tests/t/test_memory.t index b2deb8e46..cc9973b84 100644 --- a/tests/t/test_memory.t +++ b/tests/t/test_memory.t @@ -19,7 +19,7 @@ help' only on Linux. sysbench * (glob) memory options: - --memory-block-size=SIZE size of memory block for test [1K] + --memory-block-size=SIZE size of memory block for test. If 0, auto-detect CPU L3 cache and apply [65536K] --memory-total-size=SIZE total size of data to transfer [100G] --memory-scope=STRING memory access scope {global,local} [global] --memory-oper=STRING type of memory operations {read, write, none} [write] @@ -31,28 +31,16 @@ help' only on Linux. 'memory' test does not implement the 'prepare' command. [1] - $ sysbench $args --memory-block-size=-1 run - sysbench * (glob) - - FATAL: Invalid value for memory-block-size: -1 - [1] - - $ sysbench $args --memory-block-size=0 run - sysbench * (glob) - - FATAL: Invalid value for memory-block-size: 0 - [1] - $ sysbench $args --memory-block-size=3 run sysbench * (glob) - FATAL: Invalid value for memory-block-size: 3 + FATAL: Invalid value for memory-block-size: 3, should not less than 8, or specify 0 to auto-detect CPU L3 cache size [1] $ sysbench $args --memory-block-size=9 run sysbench * (glob) - FATAL: Invalid value for memory-block-size: 9 + FATAL: Invalid value for memory-block-size: 9, should be a power of 2 [1] ########################################################################