diff --git a/test/performance/Makefile.am b/test/performance/Makefile.am index 24c1767fe6..ded16bce59 100644 --- a/test/performance/Makefile.am +++ b/test/performance/Makefile.am @@ -61,10 +61,10 @@ endif bin_PROGRAMS = $(EXECUTABLES) $(COMPILE_ONLY) odp_atomic_perf_SOURCES = odp_atomic_perf.c -odp_bench_buffer_SOURCES = odp_bench_buffer.c -odp_bench_misc_SOURCES = odp_bench_misc.c -odp_bench_packet_SOURCES = odp_bench_packet.c -odp_bench_timer_SOURCES = odp_bench_timer.c +odp_bench_buffer_SOURCES = odp_bench_buffer.c bench_common.c bench_common.h +odp_bench_misc_SOURCES = odp_bench_misc.c bench_common.c bench_common.h +odp_bench_packet_SOURCES = odp_bench_packet.c bench_common.c bench_common.h +odp_bench_timer_SOURCES = odp_bench_timer.c bench_common.c bench_common.h odp_cpu_bench_SOURCES = odp_cpu_bench.c odp_crc_SOURCES = odp_crc.c odp_crypto_SOURCES = odp_crypto.c diff --git a/test/performance/bench_common.c b/test/performance/bench_common.c new file mode 100644 index 0000000000..acb70038c2 --- /dev/null +++ b/test/performance/bench_common.c @@ -0,0 +1,138 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (c) 2023 Nokia + */ + +#include +#include + +#include "bench_common.h" + +#include +#include +#include + +void bench_suite_init(bench_suite_t *suite) +{ + memset(suite, 0, sizeof(bench_suite_t)); + + suite->measure_time = true; + + odp_atomic_init_u32(&suite->exit_worker, 0); +} + +void bench_run_indef(bench_info_t *info, odp_atomic_u32_t *exit_thread) +{ + const char *desc; + + desc = info->desc != NULL ? info->desc : info->name; + + printf("Running odp_%s test indefinitely\n", desc); + + while (!odp_atomic_load_u32(exit_thread)) { + int ret; + + if (info->init != NULL) + info->init(); + + ret = info->run(); + + if (info->term != NULL) + info->term(); + + if (!ret) + ODPH_ABORT("Benchmark %s failed\n", desc); + } +} + +int bench_run(void *arg) +{ + uint64_t c1, c2; + odp_time_t t1, t2; + bench_suite_t *suite = arg; + const uint64_t repeat_count = suite->repeat_count; + const odp_bool_t meas_time = suite->measure_time; + + printf("\nAverage %s per function call\n", meas_time ? "time (nsec)" : "CPU cycles"); + printf("-------------------------------------------------\n"); + + /* Run each test twice. Results from the first warm-up round are ignored. */ + for (int i = 0; i < 2; i++) { + uint64_t total = 0; + uint64_t round = 1; + + for (int j = 0; j < suite->num_bench; round++) { + int ret; + const char *desc; + const bench_info_t *bench = &suite->bench[j]; + uint64_t max_rounds = suite->rounds; + + if (bench->max_rounds && bench->max_rounds < max_rounds) + max_rounds = bench->max_rounds; + + /* Run selected test indefinitely */ + if (suite->indef_idx) { + if ((j + 1) != suite->indef_idx) { + j++; + continue; + } + bench_run_indef(&suite->bench[j], &suite->exit_worker); + return 0; + } + + desc = bench->desc != NULL ? bench->desc : bench->name; + + if (bench->init != NULL) + bench->init(); + + if (meas_time) + t1 = odp_time_local_strict(); + else + c1 = odp_cpu_cycles(); + + ret = bench->run(); + + if (meas_time) + t2 = odp_time_local_strict(); + else + c2 = odp_cpu_cycles(); + + if (bench->term != NULL) + bench->term(); + + if (!ret) { + ODPH_ERR("Benchmark odp_%s failed\n", desc); + suite->retval = -1; + return -1; + } + + if (meas_time) + total += odp_time_diff_ns(t2, t1); + else + total += odp_cpu_cycles_diff(c2, c1); + + if (round >= max_rounds) { + double result; + + /* Each benchmark runs internally 'repeat_count' times. */ + result = ((double)total) / (max_rounds * repeat_count); + + /* No print or results from warm-up round */ + if (i > 0) { + printf("[%02d] odp_%-26s: %12.2f\n", j + 1, desc, result); + + if (suite->result) + suite->result[j] = result; + } + j++; + total = 0; + round = 1; + } + } + } + printf("\n"); + /* Print dummy result to prevent compiler to optimize it away*/ + if (suite->dummy) + printf("(dummy result: 0x%" PRIx64 ")\n\n", suite->dummy); + + return 0; +} diff --git a/test/performance/bench_common.h b/test/performance/bench_common.h new file mode 100644 index 0000000000..d33bd3b346 --- /dev/null +++ b/test/performance/bench_common.h @@ -0,0 +1,119 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (c) 2023 Nokia + */ + +#ifndef BENCH_COMMON_H +#define BENCH_COMMON_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include + +#include + +/** + * Check benchmark preconditions + * + * @retval !0 test enabled + */ +typedef int (*bench_cond_fn_t)(void); + +/** + * Initialize benchmark resources + */ +typedef void (*bench_init_fn_t)(void); + +/** + * Run benchmark + * + * @retval >0 on success + */ +typedef int (*bench_run_fn_t)(void); + +/** + * Release benchmark resources + */ +typedef void (*bench_term_fn_t)(void); + +/* Benchmark test data */ +typedef struct { + /* Default test name */ + const char *name; + + /* Optional alternate test description */ + const char *desc; + + /* Optional precondition to run test */ + bench_cond_fn_t cond; + + /* Optional test initializer function */ + bench_init_fn_t init; + + /* Test function to run */ + bench_run_fn_t run; + + /* Optional test terminate function */ + bench_term_fn_t term; + + /* Optional test specific limit for rounds (tuning for slow implementations) */ + uint32_t max_rounds; + +} bench_info_t; + +/* Benchmark suite data */ +typedef struct { + /* Array of benchmark functions */ + bench_info_t *bench; + + /* Number of benchmark functions */ + int num_bench; + + /* Optional benchmark index to run indefinitely (1...num_bench) */ + int indef_idx; + + /* Suite exit value output */ + int retval; + + /* Measure time vs. CPU cycles */ + odp_bool_t measure_time; + + /* Break worker loop if set to 1 */ + odp_atomic_u32_t exit_worker; + + /* Number of API function calls per test case */ + uint64_t repeat_count; + + /* Number of rounds per test case */ + uint64_t rounds; + + /* Dummy test result output */ + uint64_t dummy; + + /* Optional test result output array */ + double *result; + +} bench_suite_t; + +/** + * Initialize benchmark suite parameters + */ +void bench_suite_init(bench_suite_t *suite); + +/** + * Run selected test indefinitely + */ +void bench_run_indef(bench_info_t *info, odp_atomic_u32_t *exit_thread); + +/** + * Run tests suite and print results + */ +int bench_run(void *arg); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/test/performance/odp_bench_buffer.c b/test/performance/odp_bench_buffer.c index e0e7c85cdc..4e55d88791 100644 --- a/test/performance/odp_bench_buffer.c +++ b/test/performance/odp_bench_buffer.c @@ -1,5 +1,5 @@ /* Copyright (c) 2017-2018, Linaro Limited - * Copyright (c) 2022, Nokia + * Copyright (c) 2022-2023, Nokia * All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause @@ -8,6 +8,8 @@ #include #include +#include "bench_common.h" + #include #include #include @@ -20,11 +22,11 @@ /** Default pool user area size in bytes */ #define TEST_UAREA_SIZE 8 -/** Number of function calls per test cycle */ +/** Number of API function calls per test case */ #define TEST_REPEAT_COUNT 1000 -/** Default number of test cycles */ -#define TEST_CYCLES 1000 +/** Default number of rounds per test case */ +#define TEST_ROUNDS 1000u /** Maximum burst size for *_multi operations */ #define TEST_MAX_BURST 64 @@ -36,11 +38,12 @@ #define NO_PATH(file_name) (strrchr((file_name), '/') ? \ strrchr((file_name), '/') + 1 : (file_name)) -#define BENCH_INFO(run, init, term, name) \ - {#run, run, init, term, name, NULL} +#define BENCH_INFO(run_fn, init_fn, term_fn, alt_name) \ + {.name = #run_fn, .run = run_fn, .init = init_fn, .term = term_fn, .desc = alt_name} -#define BENCH_INFO_COND(run, init, term, name, cond) \ - {#run, run, init, term, name, cond} +#define BENCH_INFO_COND(run_fn, init_fn, term_fn, alt_name, cond_fn) \ + {.name = #run_fn, .run = run_fn, .init = init_fn, .term = term_fn, .desc = alt_name, \ + .cond = cond_fn} /** * Parsed command line arguments @@ -49,65 +52,25 @@ typedef struct { int bench_idx; /** Benchmark index to run indefinitely */ int burst_size; /** Burst size for *_multi operations */ int cache_size; /** Pool cache size */ - int test_cycles; /** Test cycles per tested function */ + uint32_t rounds; /** Rounds per test case */ } appl_args_t; -/** - * Initialize benchmark resources - */ -typedef void (*bench_init_fn_t)(void); - -/** - * Run benchmark - * - * @retval >0 on success - * */ -typedef int (*bench_run_fn_t)(void); - -/** - * Release benchmark resources - */ -typedef void (*bench_term_fn_t)(void); - -/** - * Check benchmark preconditions - * - * @retval !0 test enabled - * */ -typedef int (*bench_cond_fn_t)(void); - -/** - * Benchmark data - */ -typedef struct { - const char *name; - bench_run_fn_t run; - bench_init_fn_t init; - bench_term_fn_t term; - const char *desc; - bench_cond_fn_t cond; -} bench_info_t; - /** * Grouping of all global data */ typedef struct { /** Application (parsed) arguments */ appl_args_t appl; + /** Common benchmark suite data */ + bench_suite_t suite; /** Buffer pool */ odp_pool_t pool; - /** Benchmark functions */ - bench_info_t *bench; - /** Number of benchmark functions */ - int num_bench; /** Buffer size */ uint32_t buf_size; /** Buffer user area size */ uint32_t uarea_size; /** Max flow id */ uint32_t max_flow_id; - /** Break worker loop if set to 1 */ - odp_atomic_u32_t exit_thread; /** Array for storing test buffers */ odp_buffer_t buf_tbl[TEST_REPEAT_COUNT * TEST_MAX_BURST]; /** Array for storing test event */ @@ -120,8 +83,6 @@ typedef struct { odp_event_type_t event_type_tbl[TEST_REPEAT_COUNT]; /** Array for storing test event subtypes */ odp_event_subtype_t event_subtype_tbl[TEST_REPEAT_COUNT]; - /** Benchmark run failed */ - uint8_t bench_failed; /** CPU mask as string */ char cpumask_str[ODP_CPUMASK_STR_SIZE]; } args_t; @@ -133,117 +94,7 @@ static void sig_handler(int signo ODP_UNUSED) { if (gbl_args == NULL) return; - odp_atomic_store_u32(&gbl_args->exit_thread, 1); -} - -/** - * Run given benchmark indefinitely - */ -static void run_indef(args_t *args, int idx) -{ - const char *desc; - - desc = args->bench[idx].desc != NULL ? - args->bench[idx].desc : args->bench[idx].name; - - printf("Running odp_%s test indefinitely\n", desc); - - while (!odp_atomic_load_u32(&gbl_args->exit_thread)) { - int ret; - - if (args->bench[idx].init != NULL) - args->bench[idx].init(); - - ret = args->bench[idx].run(); - - if (args->bench[idx].term != NULL) - args->bench[idx].term(); - - if (!ret) - ODPH_ABORT("Benchmark %s failed\n", desc); - } -} - -static int run_benchmarks(void *arg) -{ - int i, j, k; - args_t *args = arg; - - printf("\nAverage CPU cycles per function call\n" - "---------------------------------------------\n"); - - /* Run each test twice. Results from the first warm-up round are ignored. */ - for (i = 0; i < 2; i++) { - uint64_t tot_cycles = 0; - - for (j = 0, k = 1; j < gbl_args->num_bench; k++) { - int ret; - uint64_t c1, c2; - const char *desc; - - /* Run selected test indefinitely */ - if (args->appl.bench_idx && - (j + 1) != args->appl.bench_idx) { - j++; - continue; - } else if (args->appl.bench_idx && - (j + 1) == args->appl.bench_idx) { - run_indef(args, j); - return 0; - } - - desc = args->bench[j].desc != NULL ? - args->bench[j].desc : - args->bench[j].name; - - /* Skip unsupported tests */ - if (args->bench[j].cond != NULL && !args->bench[j].cond()) { - j++; - k = 1; - if (i > 0) - printf("[%02d] odp_%-26s: n/a\n", j, desc); - continue; - } - - if (args->bench[j].init != NULL) - args->bench[j].init(); - - c1 = odp_cpu_cycles(); - ret = args->bench[j].run(); - c2 = odp_cpu_cycles(); - - if (args->bench[j].term != NULL) - args->bench[j].term(); - - if (!ret) { - ODPH_ERR("Benchmark odp_%s failed\n", desc); - args->bench_failed = 1; - return -1; - } - - tot_cycles += odp_cpu_cycles_diff(c2, c1); - - if (k >= args->appl.test_cycles) { - double cycles; - - /** Each benchmark runs internally TEST_REPEAT_COUNT times. */ - cycles = ((double)tot_cycles) / - (args->appl.test_cycles * - TEST_REPEAT_COUNT); - - /* No print from warm-up round */ - if (i > 0) - printf("[%02d] odp_%-26s: %8.1f\n", j + 1, desc, cycles); - - j++; - k = 1; - tot_cycles = 0; - } - } - } - printf("\n"); - - return 0; + odp_atomic_store_u32(&gbl_args->suite.exit_worker, 1); } static void allocate_test_buffers(odp_buffer_t buf[], int num) @@ -612,9 +463,9 @@ static void usage(char *progname) " -b, --burst Test burst size.\n" " -c, --cache_size Pool cache size.\n" " -i, --index Benchmark index to run indefinitely.\n" - " -t, --test_cycles Run each test 'num' times (default %d).\n" + " -r, --rounds Run each test case 'num' times (default %u).\n" " -h, --help Display help and exit.\n\n" - "\n", NO_PATH(progname), NO_PATH(progname), TEST_CYCLES); + "\n", NO_PATH(progname), NO_PATH(progname), TEST_ROUNDS); } /** @@ -632,17 +483,17 @@ static void parse_args(int argc, char *argv[], appl_args_t *appl_args) {"burst", required_argument, NULL, 'b'}, {"cache_size", required_argument, NULL, 'c'}, {"index", required_argument, NULL, 'i'}, - {"test_cycles", required_argument, NULL, 't'}, + {"rounds", required_argument, NULL, 'r'}, {"help", no_argument, NULL, 'h'}, {NULL, 0, NULL, 0} }; - static const char *shortopts = "c:b:i:t:h"; + static const char *shortopts = "c:b:i:r:h"; appl_args->bench_idx = 0; /* Run all benchmarks */ appl_args->burst_size = TEST_DEF_BURST; appl_args->cache_size = -1; - appl_args->test_cycles = TEST_CYCLES; + appl_args->rounds = TEST_ROUNDS; while (1) { opt = getopt_long(argc, argv, shortopts, longopts, &long_index); @@ -664,8 +515,8 @@ static void parse_args(int argc, char *argv[], appl_args_t *appl_args) case 'i': appl_args->bench_idx = atoi(optarg); break; - case 't': - appl_args->test_cycles = atoi(optarg); + case 'r': + appl_args->rounds = atoi(optarg); break; default: break; @@ -678,8 +529,8 @@ static void parse_args(int argc, char *argv[], appl_args_t *appl_args) exit(EXIT_FAILURE); } - if (appl_args->test_cycles < 1) { - printf("Invalid test cycle repeat count: %d\n", appl_args->test_cycles); + if (appl_args->rounds < 1) { + printf("Invalid number test rounds: %d\n", appl_args->rounds); exit(EXIT_FAILURE); } @@ -704,7 +555,7 @@ static void print_info(void) printf("Pool cache size: default\n"); else printf("Pool cache size: %d\n", gbl_args->appl.cache_size); - printf("Test cycles: %d\n", gbl_args->appl.test_cycles); + printf("Test rounds: %u\n", gbl_args->appl.rounds); printf("\n"); } @@ -793,14 +644,17 @@ int main(int argc, char *argv[]) } memset(gbl_args, 0, sizeof(args_t)); - odp_atomic_init_u32(&gbl_args->exit_thread, 0); - - gbl_args->bench = test_suite; - gbl_args->num_bench = sizeof(test_suite) / sizeof(test_suite[0]); /* Parse and store the application arguments */ parse_args(argc, argv, &gbl_args->appl); + bench_suite_init(&gbl_args->suite); + gbl_args->suite.bench = test_suite; + gbl_args->suite.num_bench = sizeof(test_suite) / sizeof(test_suite[0]); + gbl_args->suite.indef_idx = gbl_args->appl.bench_idx; + gbl_args->suite.rounds = gbl_args->appl.rounds; + gbl_args->suite.repeat_count = TEST_REPEAT_COUNT; + /* Get default worker cpumask */ if (odp_cpumask_default_worker(&default_mask, 1) != 1) { ODPH_ERR("Error: unable to allocate worker thread\n"); @@ -886,15 +740,15 @@ int main(int argc, char *argv[]) thr_common.share_param = 1; odph_thread_param_init(&thr_param); - thr_param.start = run_benchmarks; - thr_param.arg = gbl_args; + thr_param.start = bench_run; + thr_param.arg = &gbl_args->suite; thr_param.thr_type = ODP_THREAD_WORKER; odph_thread_create(&worker_thread, &thr_common, &thr_param, 1); odph_thread_join(&worker_thread, 1); - ret = gbl_args->bench_failed; + ret = gbl_args->suite.retval; if (odp_pool_destroy(gbl_args->pool)) { ODPH_ERR("Error: pool destroy\n"); diff --git a/test/performance/odp_bench_misc.c b/test/performance/odp_bench_misc.c index 3da4a66161..925bfc193d 100644 --- a/test/performance/odp_bench_misc.c +++ b/test/performance/odp_bench_misc.c @@ -11,6 +11,8 @@ #include #include +#include "bench_common.h" + #include #include #include @@ -23,8 +25,8 @@ /* Default number of rounds per test case */ #define ROUNDS 1000u -#define BENCH_INFO(run, init, max, name) \ - {#run, run, init, max, name} +#define BENCH_INFO(run_fn, init_fn, max, alt_name) \ + {.name = #run_fn, .run = run_fn, .init = init_fn, .max_rounds = max, .desc = alt_name} typedef struct { /* Measure time vs CPU cycles */ @@ -38,43 +40,12 @@ typedef struct { } appl_args_t; -/* Initialize benchmark resources */ -typedef void (*bench_init_fn_t)(void); - -/* Run benchmark, returns >0 on success */ -typedef int (*bench_run_fn_t)(void); - -/* Benchmark data */ -typedef struct { - /* Default test name */ - const char *name; - - /* Test function to run */ - bench_run_fn_t run; - - /* Test init function */ - bench_init_fn_t init; - - /* Test specific limit for rounds (tuning for slow implementation) */ - uint32_t max_rounds; - - /* Override default test name */ - const char *desc; - -} bench_info_t; - /* Global data */ typedef struct { appl_args_t appl; - /* Benchmark functions */ - bench_info_t *bench; - - /* Number of benchmark functions */ - int num_bench; - - /* Break worker loop if set to 1 */ - odp_atomic_u32_t exit_thread; + /* Common benchmark suite data */ + bench_suite_t suite; /* Test case input / output data */ odp_time_t t1[REPEAT_COUNT]; @@ -87,12 +58,6 @@ typedef struct { uint16_t c1[REPEAT_COUNT]; uint16_t c2[REPEAT_COUNT]; - /* Dummy result */ - uint64_t dummy; - - /* Benchmark run failed */ - int bench_failed; - /* CPU mask as string */ char cpumask_str[ODP_CPUMASK_STR_SIZE]; @@ -104,7 +69,7 @@ static void sig_handler(int signo ODP_UNUSED) { if (gbl_args == NULL) return; - odp_atomic_store_u32(&gbl_args->exit_thread, 1); + odp_atomic_store_u32(&gbl_args->suite.exit_worker, 1); } static int setup_sig_handler(void) @@ -125,118 +90,6 @@ static int setup_sig_handler(void) return 0; } -/* Run given benchmark indefinitely */ -static void run_indef(gbl_args_t *args, int idx) -{ - const char *desc; - const bench_info_t *bench = &args->bench[idx]; - - desc = bench->desc != NULL ? bench->desc : bench->name; - - printf("Running odp_%s test indefinitely\n", desc); - - while (!odp_atomic_load_u32(&gbl_args->exit_thread)) { - int ret; - - if (bench->init != NULL) - bench->init(); - - ret = bench->run(); - - if (!ret) - ODPH_ABORT("Benchmark %s failed\n", desc); - } -} - -static int run_benchmarks(void *arg) -{ - int i, j; - uint64_t c1, c2; - odp_time_t t1, t2; - gbl_args_t *args = arg; - const int meas_time = args->appl.time; - - printf("\nAverage %s per function call\n", meas_time ? "time (nsec)" : "CPU cycles"); - printf("-------------------------------------------------\n"); - - /* Run each test twice. Results from the first warm-up round are ignored. */ - for (i = 0; i < 2; i++) { - uint64_t total = 0; - uint32_t round = 1; - - for (j = 0; j < gbl_args->num_bench; round++) { - int ret; - const char *desc; - const bench_info_t *bench = &args->bench[j]; - uint32_t max_rounds = args->appl.rounds; - - if (bench->max_rounds && max_rounds > bench->max_rounds) - max_rounds = bench->max_rounds; - - /* Run selected test indefinitely */ - if (args->appl.bench_idx) { - if ((j + 1) != args->appl.bench_idx) { - j++; - continue; - } - - run_indef(args, j); - return 0; - } - - desc = bench->desc != NULL ? bench->desc : bench->name; - - if (bench->init != NULL) - bench->init(); - - if (meas_time) - t1 = odp_time_local(); - else - c1 = odp_cpu_cycles(); - - ret = bench->run(); - - if (meas_time) - t2 = odp_time_local(); - else - c2 = odp_cpu_cycles(); - - if (!ret) { - ODPH_ERR("Benchmark odp_%s failed\n", desc); - args->bench_failed = -1; - return -1; - } - - if (meas_time) - total += odp_time_diff_ns(t2, t1); - else - total += odp_cpu_cycles_diff(c2, c1); - - if (round >= max_rounds) { - double result; - - /* Each benchmark runs internally REPEAT_COUNT times. */ - result = ((double)total) / (max_rounds * REPEAT_COUNT); - - /* No print from warm-up round */ - if (i > 0) - printf("[%02d] odp_%-26s: %12.2f\n", j + 1, desc, result); - - j++; - total = 0; - round = 1; - } - } - } - - /* Print dummy result to prevent compiler to optimize it away*/ - printf("\n(dummy result: 0x%" PRIx64 ")\n", args->dummy); - - printf("\n"); - - return 0; -} - static void init_time_global(void) { int i; @@ -395,7 +248,7 @@ static int time_diff_ns(void) for (i = 0; i < REPEAT_COUNT; i++) res += odp_time_diff_ns(t2[i], t1[i]); - gbl_args->dummy += res; + gbl_args->suite.dummy += res; return i; } @@ -422,7 +275,7 @@ static int time_to_ns(void) for (i = 0; i < REPEAT_COUNT; i++) res += odp_time_to_ns(t1[i]); - gbl_args->dummy += res; + gbl_args->suite.dummy += res; return i; } @@ -461,7 +314,7 @@ static int time_cmp(void) for (i = 0; i < REPEAT_COUNT; i++) res += odp_time_cmp(t1[i], t2[i]); - gbl_args->dummy += res; + gbl_args->suite.dummy += res; return i; } @@ -577,7 +430,7 @@ static int cpu_cycles_diff(void) for (i = 0; i < REPEAT_COUNT; i++) res += odp_cpu_cycles_diff(a2[i], a1[i]); - gbl_args->dummy += res; + gbl_args->suite.dummy += res; return i; } @@ -947,7 +800,8 @@ static int parse_args(int argc, char *argv[]) return -1; } - if (appl_args->bench_idx < 0 || appl_args->bench_idx > gbl_args->num_bench) { + if (appl_args->bench_idx < 0 || + appl_args->bench_idx > (int)(sizeof(test_suite) / sizeof(test_suite[0]))) { ODPH_ERR("Bad bench index %i\n", appl_args->bench_idx); return -1; } @@ -1026,10 +880,6 @@ int main(int argc, char *argv[]) } memset(gbl_args, 0, sizeof(gbl_args_t)); - odp_atomic_init_u32(&gbl_args->exit_thread, 0); - - gbl_args->bench = test_suite; - gbl_args->num_bench = sizeof(test_suite) / sizeof(test_suite[0]); for (i = 0; i < REPEAT_COUNT; i++) { gbl_args->t1[i] = ODP_TIME_NULL; @@ -1048,6 +898,14 @@ int main(int argc, char *argv[]) if (ret) goto exit; + bench_suite_init(&gbl_args->suite); + gbl_args->suite.bench = test_suite; + gbl_args->suite.num_bench = sizeof(test_suite) / sizeof(test_suite[0]); + gbl_args->suite.measure_time = !!gbl_args->appl.time; + gbl_args->suite.indef_idx = gbl_args->appl.bench_idx; + gbl_args->suite.rounds = gbl_args->appl.rounds; + gbl_args->suite.repeat_count = REPEAT_COUNT; + /* Get default worker cpumask */ if (odp_cpumask_default_worker(&default_mask, 1) != 1) { ODPH_ERR("Unable to allocate worker thread\n"); @@ -1074,15 +932,15 @@ int main(int argc, char *argv[]) thr_common.share_param = 1; odph_thread_param_init(&thr_param); - thr_param.start = run_benchmarks; - thr_param.arg = gbl_args; + thr_param.start = bench_run; + thr_param.arg = &gbl_args->suite; thr_param.thr_type = ODP_THREAD_WORKER; odph_thread_create(&worker_thread, &thr_common, &thr_param, 1); odph_thread_join(&worker_thread, 1); - ret = gbl_args->bench_failed; + ret = gbl_args->suite.retval; exit: if (odp_shm_free(shm)) { diff --git a/test/performance/odp_bench_packet.c b/test/performance/odp_bench_packet.c index 8da2d736a0..c36202d560 100644 --- a/test/performance/odp_bench_packet.c +++ b/test/performance/odp_bench_packet.c @@ -1,5 +1,5 @@ /* Copyright (c) 2017-2018, Linaro Limited - * Copyright (c) 2022, Nokia + * Copyright (c) 2022-2023, Nokia * All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause @@ -24,6 +24,8 @@ #include #include +#include "bench_common.h" + /** Minimum number of packet data bytes in the first segment */ #define PKT_POOL_SEG_LEN 128 @@ -36,11 +38,11 @@ /** Maximum test packet size */ #define TEST_MAX_PKT_SIZE 2048 -/** Number of test runs per individual benchmark */ +/** Number of API function calls per test case */ #define TEST_REPEAT_COUNT 1000 -/** Number of times to run tests for each packet size */ -#define TEST_SIZE_RUN_COUNT 10 +/** Number of rounds per test case */ +#define TEST_ROUNDS 10u /** Maximum burst size for *_multi operations */ #define TEST_MAX_BURST 64 @@ -66,17 +68,14 @@ #define NO_PATH(file_name) (strrchr((file_name), '/') ? \ strrchr((file_name), '/') + 1 : (file_name)) -#define BENCH_INFO(run, init, term, name) \ - {#run, run, init, term, name} +#define BENCH_INFO(run_fn, init_fn, term_fn, alt_name) \ + {.name = #run_fn, .run = run_fn, .init = init_fn, .term = term_fn, .desc = alt_name} ODP_STATIC_ASSERT((TEST_ALIGN_OFFSET + TEST_ALIGN_LEN) <= TEST_MIN_PKT_SIZE, "Invalid_alignment"); -/** Warm up round packet size */ -#define WARM_UP TEST_MIN_PKT_SIZE - /** Test packet sizes */ -const uint32_t test_packet_len[] = {WARM_UP, TEST_MIN_PKT_SIZE, 128, 256, 512, +const uint32_t test_packet_len[] = {TEST_MIN_PKT_SIZE, 128, 256, 512, 1024, 1518, TEST_MAX_PKT_SIZE}; /** @@ -86,50 +85,19 @@ typedef struct { int bench_idx; /** Benchmark index to run indefinitely */ int burst_size; /** Burst size for *_multi operations */ int cache_size; /** Pool cache size */ + uint32_t rounds; /** Rounds per test case */ } appl_args_t; -/** - * Initialize benchmark resources - */ -typedef void (*bench_init_fn_t)(void); - -/** - * Run benchmark - * - * @retval >0 on success - * */ -typedef int (*bench_run_fn_t)(void); - -/** - * Release benchmark resources - */ -typedef void (*bench_term_fn_t)(void); - -/** - * Benchmark data - */ -typedef struct { - const char *name; - bench_run_fn_t run; - bench_init_fn_t init; - bench_term_fn_t term; - const char *desc; -} bench_info_t; - /** * Grouping of all global data */ typedef struct { /** Application (parsed) arguments */ appl_args_t appl; + /** Common benchmark suite data */ + bench_suite_t suite; /** Packet pool */ odp_pool_t pool; - /** Benchmark functions */ - bench_info_t *bench; - /** Number of benchmark functions */ - int num_bench; - /** Break worker loop if set to 1 */ - odp_atomic_u32_t exit_thread; struct { /** Test packet length */ uint32_t len; @@ -160,8 +128,6 @@ typedef struct { odp_time_t ts_tbl[TEST_REPEAT_COUNT]; /** Array for storing test data */ uint8_t data_tbl[TEST_REPEAT_COUNT][TEST_MAX_PKT_SIZE]; - /** Benchmark run failed */ - uint8_t bench_failed; } args_t; /** Global pointer to args */ @@ -171,35 +137,7 @@ static void sig_handler(int signo ODP_UNUSED) { if (gbl_args == NULL) return; - odp_atomic_store_u32(&gbl_args->exit_thread, 1); -} - -/** - * Run given benchmark indefinitely - */ -static void run_indef(args_t *args, int idx) -{ - const char *desc; - - desc = args->bench[idx].desc != NULL ? - args->bench[idx].desc : args->bench[idx].name; - - printf("Running odp_%s test indefinitely\n", desc); - - while (!odp_atomic_load_u32(&gbl_args->exit_thread)) { - int ret; - - if (args->bench[idx].init != NULL) - args->bench[idx].init(); - - ret = args->bench[idx].run(); - - if (args->bench[idx].term != NULL) - args->bench[idx].term(); - - if (!ret) - ODPH_ABORT("Benchmark %s failed\n", desc); - } + odp_atomic_store_u32(&gbl_args->suite.exit_worker, 1); } /** @@ -207,96 +145,38 @@ static void run_indef(args_t *args, int idx) */ static int run_benchmarks(void *arg) { - int i, j, k; + int i; args_t *args = arg; + bench_suite_t *suite = &args->suite; int num_sizes = sizeof(test_packet_len) / sizeof(test_packet_len[0]); - double results[gbl_args->num_bench][num_sizes]; + double results[num_sizes][suite->num_bench]; memset(results, 0, sizeof(results)); - printf("\nRunning benchmarks (cycles per call)\n" - "------------------------------------\n"); - for (i = 0; i < num_sizes; i++) { - uint64_t tot_cycles = 0; - - printf("\nPacket length: %6d bytes\n" - "---------------------------\n", test_packet_len[i]); + printf("Packet length: %6d bytes", test_packet_len[i]); gbl_args->pkt.len = test_packet_len[i]; - for (j = 0, k = 1; j < gbl_args->num_bench; k++) { - int ret; - uint64_t c1, c2; - const char *desc; - - if (args->appl.bench_idx && - (j + 1) != args->appl.bench_idx) { - j++; - continue; - } else if (args->appl.bench_idx && - (j + 1) == args->appl.bench_idx) { - run_indef(args, j); - return 0; - } - - desc = args->bench[j].desc != NULL ? - args->bench[j].desc : - args->bench[j].name; - - if (args->bench[j].init != NULL) - args->bench[j].init(); - - c1 = odp_cpu_cycles(); - ret = args->bench[j].run(); - c2 = odp_cpu_cycles(); - - if (args->bench[j].term != NULL) - args->bench[j].term(); - - if (!ret) { - ODPH_ERR("Benchmark %s failed\n", desc); - args->bench_failed = 1; - return -1; - } - - tot_cycles += odp_cpu_cycles_diff(c2, c1); - - if (k >= TEST_SIZE_RUN_COUNT) { - double cycles; - - /** Each benchmark runs internally - * TEST_REPEAT_COUNT times. */ - cycles = ((double)tot_cycles) / - (TEST_SIZE_RUN_COUNT * - TEST_REPEAT_COUNT); - results[j][i] = cycles; - - printf("odp_%-26s: %8.1f\n", desc, cycles); - - j++; - k = 1; - tot_cycles = 0; - } - } - } - printf("\n%-30s", "Benchmark / packet_size [B]"); - for (i = 0; i < num_sizes; i++) { - if (i == 0) - printf(" WARM UP "); - else - printf("%8.1d ", test_packet_len[i]); + suite->result = results[i]; + + bench_run(suite); } + + printf("\n%-35s", "Benchmark / packet_size [B]"); + for (i = 0; i < num_sizes; i++) + printf("%8.1d ", test_packet_len[i]); + printf("\n---------------------------------"); for (i = 0; i < num_sizes; i++) printf("----------"); - for (i = 0; i < gbl_args->num_bench; i++) { - printf("\n[%02d] odp_%-26s", i + 1, args->bench[i].desc != NULL ? - args->bench[i].desc : args->bench[i].name); + for (i = 0; i < suite->num_bench; i++) { + printf("\n[%02d] odp_%-26s", i + 1, suite->bench[i].desc != NULL ? + suite->bench[i].desc : suite->bench[i].name); - for (j = 0; j < num_sizes; j++) - printf("%8.1f ", results[i][j]); + for (int j = 0; j < num_sizes; j++) + printf("%8.1f ", results[j][i]); } printf("\n\n"); return 0; @@ -1480,8 +1360,9 @@ static void usage(char *progname) " -b, --burst Test packet burst size.\n" " -c, --cache_size Pool cache size.\n" " -i, --index Benchmark index to run indefinitely.\n" + " -r, --rounds Run each test case 'num' times (default %u).\n" " -h, --help Display help and exit.\n\n" - "\n", NO_PATH(progname), NO_PATH(progname)); + "\n", NO_PATH(progname), NO_PATH(progname), TEST_ROUNDS); } /** @@ -1498,16 +1379,18 @@ static void parse_args(int argc, char *argv[], appl_args_t *appl_args) static const struct option longopts[] = { {"burst", required_argument, NULL, 'b'}, {"cache_size", required_argument, NULL, 'c'}, - {"help", no_argument, NULL, 'h'}, {"index", required_argument, NULL, 'i'}, + {"rounds", required_argument, NULL, 'r'}, + {"help", no_argument, NULL, 'h'}, {NULL, 0, NULL, 0} }; - static const char *shortopts = "c:b:i:h"; + static const char *shortopts = "c:b:i:r:h"; appl_args->bench_idx = 0; /* Run all benchmarks */ appl_args->burst_size = TEST_DEF_BURST; appl_args->cache_size = -1; + appl_args->rounds = TEST_ROUNDS; while (1) { opt = getopt_long(argc, argv, shortopts, longopts, &long_index); @@ -1522,15 +1405,19 @@ static void parse_args(int argc, char *argv[], appl_args_t *appl_args) case 'b': appl_args->burst_size = atoi(optarg); break; + case 'i': + appl_args->bench_idx = atoi(optarg); + break; + case 'r': + appl_args->rounds = atoi(optarg); + break; case 'h': usage(argv[0]); exit(EXIT_SUCCESS); break; - case 'i': - appl_args->bench_idx = atoi(optarg); - break; default: - break; + usage(argv[0]); + exit(EXIT_FAILURE); } } @@ -1540,6 +1427,11 @@ static void parse_args(int argc, char *argv[], appl_args_t *appl_args) exit(EXIT_FAILURE); } + if (appl_args->rounds < 1) { + printf("Invalid number test rounds: %d\n", appl_args->rounds); + exit(EXIT_FAILURE); + } + optind = 1; /* Reset 'extern optind' from the getopt lib */ } @@ -1711,14 +1603,17 @@ int main(int argc, char *argv[]) } memset(gbl_args, 0, sizeof(args_t)); - odp_atomic_init_u32(&gbl_args->exit_thread, 0); - - gbl_args->bench = test_suite; - gbl_args->num_bench = sizeof(test_suite) / sizeof(test_suite[0]); /* Parse and store the application arguments */ parse_args(argc, argv, &gbl_args->appl); + bench_suite_init(&gbl_args->suite); + gbl_args->suite.bench = test_suite; + gbl_args->suite.num_bench = sizeof(test_suite) / sizeof(test_suite[0]); + gbl_args->suite.indef_idx = gbl_args->appl.bench_idx; + gbl_args->suite.rounds = gbl_args->appl.rounds; + gbl_args->suite.repeat_count = TEST_REPEAT_COUNT; + /* Print both system and application information */ print_info(NO_PATH(argv[0]), &gbl_args->appl); @@ -1788,6 +1683,7 @@ int main(int argc, char *argv[]) printf("CPU mask: %s\n", cpumaskstr); printf("Burst size: %d\n", gbl_args->appl.burst_size); printf("Bench repeat: %d\n", TEST_REPEAT_COUNT); + printf("Test rounds: %u\n", gbl_args->appl.rounds); if (gbl_args->appl.cache_size < 0) printf("Pool cache size: default\n"); else @@ -1821,7 +1717,7 @@ int main(int argc, char *argv[]) odph_thread_join(&worker_thread, 1); - ret = gbl_args->bench_failed; + ret = gbl_args->suite.retval; if (odp_pool_destroy(gbl_args->pool)) { ODPH_ERR("Error: pool destroy\n"); diff --git a/test/performance/odp_bench_timer.c b/test/performance/odp_bench_timer.c index 918d19e5d7..f6762175eb 100644 --- a/test/performance/odp_bench_timer.c +++ b/test/performance/odp_bench_timer.c @@ -11,6 +11,8 @@ #include #include +#include "bench_common.h" + #include #include #include @@ -29,27 +31,8 @@ /** Timer duration in nsec */ #define TIMER_NSEC 50000000 -#define BENCH_INFO(run, max, name) \ - {#run, run, max, name} - -/* Run benchmark, returns >0 on success */ -typedef int (*bench_run_fn_t)(void); - -/* Benchmark data */ -typedef struct { - /* Default test name */ - const char *name; - - /* Test function to run */ - bench_run_fn_t run; - - /* Test specific limit for rounds (tuning for slow implementation) */ - uint32_t max_rounds; - - /* Override default test name */ - const char *desc; - -} bench_info_t; +#define BENCH_INFO(run_fn, max, alt_name) \ + {.name = #run_fn, .run = run_fn, .max_rounds = max, .desc = alt_name} typedef struct { /* Command line options */ @@ -68,6 +51,9 @@ typedef struct { } opt; + /* Common benchmark suite data */ + bench_suite_t suite; + odp_timer_pool_t timer_pool; odp_timer_t timer; odp_queue_t queue; @@ -80,27 +66,12 @@ typedef struct { double tick_hz; int plain_queue; - /* Benchmark functions */ - bench_info_t *bench; - - /* Number of benchmark functions */ - int num_bench; - - /* Break worker loop if set to 1 */ - odp_atomic_u32_t exit_thread; - /* Test case input / output data */ uint64_t a1[REPEAT_COUNT]; odp_event_t ev[REPEAT_COUNT]; odp_timeout_t tmo[REPEAT_COUNT]; odp_timer_t tim[REPEAT_COUNT]; - /* Dummy result */ - uint64_t dummy; - - /* Benchmark run failed */ - int bench_failed; - /* CPU mask as string */ char cpumask_str[ODP_CPUMASK_STR_SIZE]; @@ -112,7 +83,7 @@ static void sig_handler(int signo ODP_UNUSED) { if (gbl_args == NULL) return; - odp_atomic_store_u32(&gbl_args->exit_thread, 1); + odp_atomic_store_u32(&gbl_args->suite.exit_worker, 1); } static int setup_sig_handler(void) @@ -133,113 +104,6 @@ static int setup_sig_handler(void) return 0; } -/* Run given benchmark indefinitely */ -static void run_indef(gbl_args_t *args, int idx) -{ - const char *desc; - const bench_info_t *bench = &args->bench[idx]; - - desc = bench->desc != NULL ? bench->desc : bench->name; - - printf("Running odp_%s test indefinitely\n", desc); - - while (!odp_atomic_load_u32(&gbl_args->exit_thread)) { - int ret; - - ret = bench->run(); - - if (!ret) - ODPH_ABORT("Benchmark %s failed\n", desc); - } -} - -static int run_benchmarks(void *arg) -{ - int i, j; - uint64_t c1, c2; - odp_time_t t1, t2; - gbl_args_t *args = arg; - const int meas_time = args->opt.time; - - printf("\nAverage %s per function call\n", meas_time ? "time (nsec)" : "CPU cycles"); - printf("-------------------------------------------------\n"); - - /* Run each test twice. Results from the first warm-up round are ignored. */ - for (i = 0; i < 2; i++) { - uint64_t total = 0; - uint32_t round = 1; - - for (j = 0; j < gbl_args->num_bench; round++) { - int ret; - const char *desc; - const bench_info_t *bench = &args->bench[j]; - uint32_t max_rounds = args->opt.rounds; - - if (bench->max_rounds && max_rounds > bench->max_rounds) - max_rounds = bench->max_rounds; - - /* Run selected test indefinitely */ - if (args->opt.bench_idx) { - if ((j + 1) != args->opt.bench_idx) { - j++; - continue; - } - - run_indef(args, j); - return 0; - } - - desc = bench->desc != NULL ? bench->desc : bench->name; - - if (meas_time) - t1 = odp_time_local(); - else - c1 = odp_cpu_cycles(); - - ret = bench->run(); - - if (meas_time) - t2 = odp_time_local(); - else - c2 = odp_cpu_cycles(); - - if (!ret) { - ODPH_ERR("Benchmark odp_%s failed\n", desc); - args->bench_failed = -1; - return -1; - } - - if (meas_time) - total += odp_time_diff_ns(t2, t1); - else - total += odp_cpu_cycles_diff(c2, c1); - - for (i = 0; i < REPEAT_COUNT; i++) - args->dummy += args->a1[i]; - - if (round >= max_rounds) { - double result; - - /* Each benchmark runs internally REPEAT_COUNT times. */ - result = ((double)total) / (max_rounds * REPEAT_COUNT); - - /* No print from warm-up round */ - if (i > 0) - printf("[%02d] odp_%-26s: %12.2f\n", j + 1, desc, result); - - j++; - total = 0; - round = 1; - } - } - } - - /* Print dummy result to prevent compiler to optimize it away*/ - printf("\n(dummy result: 0x%" PRIx64 ")\n\n", args->dummy); - - return 0; -} - static int timer_current_tick(void) { int i; @@ -287,7 +151,7 @@ static int timeout_to_event(void) for (i = 0; i < REPEAT_COUNT; i++) ev[i] = odp_timeout_to_event(timeout); - gbl_args->dummy += odp_event_to_u64(ev[0]); + gbl_args->suite.dummy += odp_event_to_u64(ev[0]); return i; } @@ -301,7 +165,7 @@ static int timeout_from_event(void) for (i = 0; i < REPEAT_COUNT; i++) tmo[i] = odp_timeout_from_event(ev); - gbl_args->dummy += odp_timeout_to_u64(tmo[0]); + gbl_args->suite.dummy += odp_timeout_to_u64(tmo[0]); return i; } @@ -327,7 +191,7 @@ static int timeout_timer(void) for (i = 0; i < REPEAT_COUNT; i++) tim[i] = odp_timeout_timer(timeout); - gbl_args->dummy += odp_timer_to_u64(tim[0]); + gbl_args->suite.dummy += odp_timer_to_u64(tim[0]); return i; } @@ -491,7 +355,8 @@ static int parse_args(int argc, char *argv[]) return -1; } - if (gbl_args->opt.bench_idx < 0 || gbl_args->opt.bench_idx > gbl_args->num_bench) { + if (gbl_args->opt.bench_idx < 0 || + gbl_args->opt.bench_idx > (int)(sizeof(test_suite) / sizeof(test_suite[0]))) { ODPH_ERR("Bad bench index %i\n", gbl_args->opt.bench_idx); return -1; } @@ -760,16 +625,12 @@ int main(int argc, char *argv[]) } memset(gbl_args, 0, sizeof(gbl_args_t)); - odp_atomic_init_u32(&gbl_args->exit_thread, 0); gbl_args->timer_pool = ODP_TIMER_POOL_INVALID; gbl_args->timer = ODP_TIMER_INVALID; gbl_args->queue = ODP_QUEUE_INVALID; gbl_args->pool = ODP_POOL_INVALID; gbl_args->timeout = ODP_TIMEOUT_INVALID; - gbl_args->bench = test_suite; - gbl_args->num_bench = sizeof(test_suite) / sizeof(test_suite[0]); - for (i = 0; i < REPEAT_COUNT; i++) { gbl_args->a1[i] = i; gbl_args->ev[i] = ODP_EVENT_INVALID; @@ -782,6 +643,14 @@ int main(int argc, char *argv[]) if (ret) goto exit; + bench_suite_init(&gbl_args->suite); + gbl_args->suite.bench = test_suite; + gbl_args->suite.num_bench = sizeof(test_suite) / sizeof(test_suite[0]); + gbl_args->suite.measure_time = !!gbl_args->opt.time; + gbl_args->suite.indef_idx = gbl_args->opt.bench_idx; + gbl_args->suite.rounds = gbl_args->opt.rounds; + gbl_args->suite.repeat_count = REPEAT_COUNT; + /* Get default worker cpumask */ if (odp_cpumask_default_worker(&default_mask, 1) != 1) { ODPH_ERR("Unable to allocate worker thread\n"); @@ -819,15 +688,15 @@ int main(int argc, char *argv[]) thr_common.share_param = 1; odph_thread_param_init(&thr_param); - thr_param.start = run_benchmarks; - thr_param.arg = gbl_args; + thr_param.start = bench_run; + thr_param.arg = &gbl_args->suite; thr_param.thr_type = ODP_THREAD_WORKER; odph_thread_create(&worker_thread, &thr_common, &thr_param, 1); odph_thread_join(&worker_thread, 1); - ret = gbl_args->bench_failed; + ret = gbl_args->suite.retval; exit: if (gbl_args->timeout != ODP_TIMEOUT_INVALID)