Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[PATCH v3] linux-gen: cpu: optimize aarch64 odp_cpu_cycles() implementation #1879

Merged
merged 2 commits into from
Aug 23, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion platform/linux-generic/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -353,7 +353,7 @@ noinst_HEADERS += arch/arm/odp_atomic.h \
endif
if ARCH_IS_AARCH64
__LIB__libodp_linux_la_SOURCES += arch/aarch64/odp_atomic.c \
arch/default/odp_cpu_cycles.c \
arch/aarch64/odp_cpu_cycles.c \
arch/aarch64/cpu_flags.c \
arch/default/odp_hash_crc32.c \
arch/default/odp_random.c \
Expand Down
34 changes: 31 additions & 3 deletions platform/linux-generic/arch/aarch64/odp/api/abi/cpu_inlines.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/* Copyright (c) 2016-2018, Linaro Limited
* Copyright (c) 2021, Nokia
* Copyright (c) 2021-2023, Nokia
* All rights reserved.
*
* SPDX-License-Identifier: BSD-3-Clause
Expand All @@ -12,6 +12,22 @@
extern "C" {
#endif

#include <odp/api/abi/time_cpu.h>

#include <stdint.h>

/* CPU frequency is shifted to decrease integer division error */
#define _ODP_CPU_FREQ_SHIFT 16

typedef struct _odp_cpu_cycles_global_t {
uint64_t res;
uint64_t res_shifted;
uint64_t max;

} _odp_cpu_cycles_global_t;

extern _odp_cpu_cycles_global_t _odp_cpu_cycles_glob;

static inline void _odp_cpu_pause(void)
{
/* YIELD hints the CPU to switch to another thread if possible
Expand All @@ -22,8 +38,20 @@ static inline void _odp_cpu_pause(void)
__asm volatile("isb" ::: "memory");
}

/* Use generic implementations for the rest of the functions */
#include <odp/api/abi/cpu_generic.h>
static inline uint64_t _odp_cpu_cycles(void)
{
return (_odp_time_cpu_global() * _odp_cpu_cycles_glob.res_shifted) >> _ODP_CPU_FREQ_SHIFT;
}

static inline uint64_t _odp_cpu_cycles_resolution(void)
{
return _odp_cpu_cycles_glob.res;
}

static inline uint64_t _odp_cpu_cycles_max(void)
{
return _odp_cpu_cycles_glob.max;
}

#ifdef __cplusplus
}
Expand Down
48 changes: 48 additions & 0 deletions platform/linux-generic/arch/aarch64/odp_cpu_cycles.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
/* SPDX-License-Identifier: BSD-3-Clause
* Copyright (c) 2023 Nokia
*/

#include <odp/api/cpu.h>

#include <odp/api/abi/cpu_inlines.h>
#include <odp/api/abi/time_cpu.h>

#include <odp_debug_internal.h>
#include <odp_init_internal.h>

#include <string.h>

#include <odp/visibility_begin.h>

_odp_cpu_cycles_global_t _odp_cpu_cycles_glob;

#include <odp/visibility_end.h>

int _odp_cpu_cycles_init_global(void)
{
uint64_t cpu_hz, cpu_time_hz;

memset(&_odp_cpu_cycles_glob, 0, sizeof(_odp_cpu_cycles_global_t));

cpu_time_hz = _odp_time_cpu_global_freq();
if (cpu_time_hz == 0) {
_ODP_ERR("CPU time counter frequency not available\n");
return -1;
}

cpu_hz = odp_cpu_hz_max_id(0);
if (cpu_hz == 0) {
_ODP_ERR("CPU frequency not available\n");
return -1;
}

_odp_cpu_cycles_glob.res_shifted = (cpu_hz << _ODP_CPU_FREQ_SHIFT) / cpu_time_hz;

_odp_cpu_cycles_glob.res = cpu_hz > cpu_time_hz ?
(_odp_cpu_cycles_glob.res_shifted >> _ODP_CPU_FREQ_SHIFT) : 1;

_odp_cpu_cycles_glob.max = (UINT64_MAX >> _ODP_CPU_FREQ_SHIFT) -
(UINT64_MAX % _odp_cpu_cycles_glob.res);

return 0;
}
4 changes: 2 additions & 2 deletions platform/linux-generic/include/odp/api/plat/cpu_inlines.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/* Copyright (c) 2018, Linaro Limited
* Copyright (c) 2021, Nokia
* Copyright (c) 2021-2023, Nokia
* All rights reserved.
*
* SPDX-License-Identifier: BSD-3-Clause
Expand Down Expand Up @@ -53,7 +53,7 @@ _ODP_INLINE uint64_t odp_cpu_cycles_diff(uint64_t c2, uint64_t c1)
if (odp_likely(c2 >= c1))
return c2 - c1;

return c2 + (odp_cpu_cycles_max() - c1) + 1;
return c2 + (odp_cpu_cycles_max() - c1) + _odp_cpu_cycles_resolution();
}

/** @endcond */
Expand Down
17 changes: 8 additions & 9 deletions platform/linux-generic/odp_init.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@ enum init_stage {
NO_INIT = 0, /* No init stages completed */
LIBCONFIG_INIT,
CPUMASK_INIT,
CPU_CYCLES_INIT,
SYSINFO_INIT,
CPU_CYCLES_INIT,
TIME_INIT,
ISHM_INIT,
FDSERVER_INIT,
Expand Down Expand Up @@ -299,15 +299,14 @@ static int term_global(enum init_stage stage)
}
/* Fall through */

case CPU_CYCLES_INIT:
case SYSINFO_INIT:
if (_odp_system_info_term()) {
_ODP_ERR("ODP system info term failed.\n");
rc = -1;
}
/* Fall through */

case CPU_CYCLES_INIT:
/* Fall through */
case CPUMASK_INIT:
if (_odp_cpumask_term_global()) {
_ODP_ERR("ODP cpumask term failed.\n");
Expand Down Expand Up @@ -366,18 +365,18 @@ int odp_init_global(odp_instance_t *instance,
}
stage = CPUMASK_INIT;

if (_odp_cpu_cycles_init_global()) {
_ODP_ERR("ODP cpu cycle init failed.\n");
goto init_failed;
}
stage = CPU_CYCLES_INIT;

if (_odp_system_info_init()) {
_ODP_ERR("ODP system_info init failed.\n");
goto init_failed;
}
stage = SYSINFO_INIT;

if (_odp_cpu_cycles_init_global()) {
_ODP_ERR("ODP cpu cycle init failed.\n");
goto init_failed;
}
stage = CPU_CYCLES_INIT;

if (_odp_time_init_global()) {
_ODP_ERR("ODP time init failed.\n");
goto init_failed;
Expand Down
40 changes: 39 additions & 1 deletion test/performance/odp_bench_misc.c
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
/* Copyright (c) 2022, Nokia
/* Copyright (c) 2022-2023, Nokia
* All rights reserved.
*
* SPDX-License-Identifier: BSD-3-Clause
Expand Down Expand Up @@ -521,6 +521,18 @@ static int cpu_hz(void)
return i;
}

static int cpu_hz_id(void)
{
int i;
const int id = odp_cpu_id();
uint64_t *a1 = gbl_args->a1;

for (i = 0; i < REPEAT_COUNT; i++)
a1[i] = odp_cpu_hz_id(id);

return i;
}

static int cpu_hz_max(void)
{
int i;
Expand All @@ -532,6 +544,18 @@ static int cpu_hz_max(void)
return i;
}

static int cpu_hz_max_id(void)
{
int i;
const int id = odp_cpu_id();
uint64_t *a1 = gbl_args->a1;

for (i = 0; i < REPEAT_COUNT; i++)
a1[i] = odp_cpu_hz_max_id(id);

return i;
}

static int cpu_cycles(void)
{
int i;
Expand Down Expand Up @@ -569,6 +593,17 @@ static int cpu_cycles_max(void)
return i;
}

static int cpu_cycles_resolution(void)
{
int i;
uint64_t *a1 = gbl_args->a1;

for (i = 0; i < REPEAT_COUNT; i++)
a1[i] = odp_cpu_cycles_resolution();

return i;
}

static int cpu_pause(void)
{
int i;
Expand Down Expand Up @@ -819,10 +854,13 @@ bench_info_t test_suite[] = {
BENCH_INFO(cpu_id, NULL, 0, NULL),
BENCH_INFO(cpu_count, NULL, 0, NULL),
BENCH_INFO(cpu_hz, NULL, 1, NULL),
BENCH_INFO(cpu_hz_id, NULL, 1, NULL),
BENCH_INFO(cpu_hz_max, NULL, 0, NULL),
BENCH_INFO(cpu_hz_max_id, NULL, 0, NULL),
BENCH_INFO(cpu_cycles, NULL, 0, NULL),
BENCH_INFO(cpu_cycles_diff, init_cpu_cycles, 0, NULL),
BENCH_INFO(cpu_cycles_max, NULL, 0, NULL),
BENCH_INFO(cpu_cycles_resolution, NULL, 0, NULL),
BENCH_INFO(cpu_pause, NULL, 0, NULL),
BENCH_INFO(thread_id, NULL, 0, NULL),
BENCH_INFO(thread_count, NULL, 0, NULL),
Expand Down