From cb26be817570a5d9e1ace3f2498d96fce5f89bea Mon Sep 17 00:00:00 2001 From: Hartmut Kaiser Date: Thu, 9 May 2024 18:13:16 -0500 Subject: [PATCH] Pre-generate cacheline size --- CMakeLists.txt | 5 + cmake/CacheLineSize.cmake | 63 ++++ cmake/HPX_AddModule.cmake | 12 + .../hpx/concurrency/cache_line_data.hpp | 299 ++++++++---------- .../cmake/templates/cache_line_size.hpp.in | 17 + libs/core/config/include/hpx/config.hpp | 1 + 6 files changed, 233 insertions(+), 164 deletions(-) create mode 100644 cmake/CacheLineSize.cmake create mode 100644 libs/core/config/cmake/templates/cache_line_size.hpp.in diff --git a/CMakeLists.txt b/CMakeLists.txt index 19bcb7181786..61b422d4335f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1690,9 +1690,13 @@ hpx_option( include(HPX_SetupCUDA) include(HPX_PerformCxxFeatureTests) hpx_perform_cxx_feature_tests() + include(TargetArch) target_architecture(__target_arch) +include(CacheLineSize) +cache_line_size(__cache_line_size) + # ############################################################################## # Set configuration option to use Boost.Context or not. This depends on the # platform. @@ -2014,6 +2018,7 @@ if(HPX_WITH_COMPILER_WARNINGS_AS_ERRORS) endif() # Diagnostics +hpx_info("Cacheline size detected: ${__cache_line_size}") if(MSVC) # Display full paths in diagnostics hpx_add_compile_flag(-FC) diff --git a/cmake/CacheLineSize.cmake b/cmake/CacheLineSize.cmake new file mode 100644 index 000000000000..1f63ed9cc740 --- /dev/null +++ b/cmake/CacheLineSize.cmake @@ -0,0 +1,63 @@ +# Copyright (c) 2024 Hartmut Kaiser +# +# SPDX-License-Identifier: BSL-1.0 +# Distributed under the Boost Software License, Version 1.0. (See accompanying +# file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +set(cache_line_size_detect_cpp_code + " + #include + #include + int main() + { +#if defined(HPX_HAVE_CXX17_HARDWARE_DESTRUCTIVE_INTERFERENCE_SIZE) + std::cout << std::hardware_destructive_interference_size; +#else +#if defined(__s390__) || defined(__s390x__) + std::cout << 256; // assume 256 byte cache-line size +#elif defined(powerpc) || defined(__powerpc__) || defined(__ppc__) + std::cout << 128; // assume 128 byte cache-line size +#else + std::cout << 64; // assume 64 byte cache-line size +#endif +#endif + } +" +) + +function(cache_line_size output_var) + if(NOT HPX_INTERNAL_CACHE_LINE_SIZE_DETECT) + file(WRITE "${PROJECT_BINARY_DIR}/cache_line_size.cpp" + "${cache_line_size_detect_cpp_code}" + ) + + if(HPX_WITH_CXX17_HARDWARE_DESTRUCTIVE_INTERFERENCE_SIZE) + set(compile_definitions + "-DHPX_HAVE_CXX17_HARDWARE_DESTRUCTIVE_INTERFERENCE_SIZE" + ) + endif() + + try_run( + run_result_unused compile_result_unused "${PROJECT_BINARY_DIR}" SOURCES + "${PROJECT_BINARY_DIR}/cache_line_size.cpp" + COMPILE_DEFINITIONS ${compile_definitions} + CMAKE_FLAGS CXX_STANDARD 17 CXX_STANDARD_REQUIRED ON CXX_EXTENSIONS FALSE + RUN_OUTPUT_VARIABLE CACHE_LINE_SIZE + ) + + if(NOT CACHE_LINE_SIZE) + set(CACHE_LINE_SIZE "64") + endif() + set(HPX_INTERNAL_CACHE_LINE_SIZE_DETECT + ${CACHE_LINE_SIZE} + CACHE INTERNAL "" + ) + else() + set(CACHE_LINE_SIZE ${HPX_INTERNAL_CACHE_LINE_SIZE_DETECT}) + endif() + + set(${output_var} + "${CACHE_LINE_SIZE}" + PARENT_SCOPE + ) +endfunction() diff --git a/cmake/HPX_AddModule.cmake b/cmake/HPX_AddModule.cmake index 22510e5117be..850d5c83d05f 100644 --- a/cmake/HPX_AddModule.cmake +++ b/cmake/HPX_AddModule.cmake @@ -210,6 +210,7 @@ function(add_hpx_module libname modulename) "${global_config_file}" @ONLY ) set(generated_headers ${generated_headers} ${global_config_file}) + # Global config defines file (different from the one for each module) set(global_config_file ${CMAKE_CURRENT_BINARY_DIR}/include/hpx/config/defines.hpp @@ -221,6 +222,17 @@ function(add_hpx_module libname modulename) FILENAME "${global_config_file}" ) set(generated_headers ${generated_headers} ${global_config_file}) + + # Cacheline size definition + set(cache_line_size_file + ${CMAKE_CURRENT_BINARY_DIR}/include/hpx/config/cache_line_size.hpp + ) + configure_file( + "${CMAKE_CURRENT_SOURCE_DIR}/cmake/templates/cache_line_size.hpp.in" + "${cache_line_size_file}" @ONLY + ) + set(generated_headers ${generated_headers} ${cache_line_size_file}) + endif() # collect zombie generated headers diff --git a/libs/core/concurrency/include/hpx/concurrency/cache_line_data.hpp b/libs/core/concurrency/include/hpx/concurrency/cache_line_data.hpp index 8df03a0ffec8..21033fbee881 100644 --- a/libs/core/concurrency/include/hpx/concurrency/cache_line_data.hpp +++ b/libs/core/concurrency/include/hpx/concurrency/cache_line_data.hpp @@ -1,4 +1,4 @@ -// Copyright (c) 2019-2022 Hartmut Kaiser +// Copyright (c) 2019-2024 Hartmut Kaiser // Copyright (c) 2019 Thomas Heller // // SPDX-License-Identifier: BSL-1.0 @@ -11,192 +11,163 @@ #include #include -#include #include #include -namespace hpx { +namespace hpx::util { - namespace threads { + namespace detail { - //////////////////////////////////////////////////////////////////////// - // abstract away cache-line size - constexpr std::size_t get_cache_line_size() noexcept + // Computes the padding required to fill up a full cache line after + // data_size bytes. + constexpr std::size_t get_cache_line_padding_size( + std::size_t data_size) noexcept { -#if defined(HPX_HAVE_CXX17_HARDWARE_DESTRUCTIVE_INTERFERENCE_SIZE) - return std::hardware_destructive_interference_size; -#else -#if defined(__s390__) || defined(__s390x__) - return 256; // assume 256 byte cache-line size -#elif defined(powerpc) || defined(__powerpc__) || defined(__ppc__) - return 128; // assume 128 byte cache-line size -#else - return 64; // assume 64 byte cache-line size -#endif -#endif + return (threads::get_cache_line_size() - + (data_size % threads::get_cache_line_size())) % + threads::get_cache_line_size(); } - } // namespace threads - - namespace util { - - namespace detail { - - // Computes the padding required to fill up a full cache line after - // data_size bytes. - constexpr std::size_t get_cache_line_padding_size( - std::size_t data_size) noexcept - { - return (threads::get_cache_line_size() - - (data_size % threads::get_cache_line_size())) % - threads::get_cache_line_size(); - } - - template - struct needs_padding - : std::integral_constant - { - }; - } // namespace detail - - // Variable 'cacheline_pad' is uninitialized. Always initialize a member - // variable + + template + struct needs_padding + : std::integral_constant + { + }; + } // namespace detail + + // Variable 'cacheline_pad' is uninitialized. Always initialize a member + // variable #if defined(HPX_MSVC) #pragma warning(push) #pragma warning(disable : 26495) #endif - // NOTE: We do not use alignas here because asking for overaligned - // memory is significantly more expensive than asking for unaligned - // memory. Padding the struct is cheaper and enough for internal - // purposes. - - // NOTE: The implementations below are currently identical because of - // the above issue. Both names are kept for compatibility. + // NOTE: We do not use alignas here because asking for overaligned + // memory is significantly more expensive than asking for unaligned + // memory. Padding the struct is cheaper and enough for internal + // purposes. + + // NOTE: The implementations below are currently identical because of + // the above issue. Both names are kept for compatibility. + + /////////////////////////////////////////////////////////////////////////// + // special struct to ensure cache line alignment of a data type + template ::value> + struct cache_aligned_data + { + // We have an explicit (default) constructor here to avoid for the + // entire cache-line to be initialized by the compiler. + + constexpr cache_aligned_data() noexcept( //-V730 + std::is_nothrow_default_constructible_v) + : data_() + { + } - /////////////////////////////////////////////////////////////////////////// - // special struct to ensure cache line alignment of a data type - template ::value> - struct cache_aligned_data + template >> + cache_aligned_data(Ts&&... ts) noexcept( + std::is_nothrow_constructible_v) + : data_(HPX_FORWARD(Ts, ts)...) { - // We have an explicit (default) constructor here to avoid for the - // entire cache-line to be initialized by the compiler. - - constexpr cache_aligned_data() noexcept( //-V730 - std::is_nothrow_default_constructible_v) - : data_() - { - } - - template >> - cache_aligned_data(Ts&&... ts) noexcept( - std::is_nothrow_constructible_v) - : data_(HPX_FORWARD(Ts, ts)...) - { - } - - // pad to cache line size bytes - Data data_; - - // cppcheck-suppress unusedVariable - char cacheline_pad[detail::get_cache_line_padding_size( - // NOLINTNEXTLINE(bugprone-sizeof-expression) - sizeof(Data))]; - }; + } - template - struct cache_aligned_data + // pad to cache line size bytes + Data data_; + + // cppcheck-suppress unusedVariable + char cacheline_pad[detail::get_cache_line_padding_size( + // NOLINTNEXTLINE(bugprone-sizeof-expression) + sizeof(Data))]; + }; + + template + struct cache_aligned_data + { + constexpr cache_aligned_data() noexcept( + std::is_nothrow_default_constructible_v) + : data_() { - constexpr cache_aligned_data() noexcept( - std::is_nothrow_default_constructible_v) - : data_() - { - } - - template >> - cache_aligned_data(Ts&&... ts) noexcept( - std::is_nothrow_constructible_v) - : data_(HPX_FORWARD(Ts, ts)...) - { - } - - // no need to pad to cache line size - Data data_; - }; + } - /////////////////////////////////////////////////////////////////////////// - // special struct to ensure cache line alignment of a data type - template ::value> - struct cache_aligned_data_derived : Data + template >> + cache_aligned_data(Ts&&... ts) noexcept( + std::is_nothrow_constructible_v) + : data_(HPX_FORWARD(Ts, ts)...) { - // We have an explicit (default) constructor here to avoid for the - // entire cache-line to be initialized by the compiler. - constexpr cache_aligned_data_derived() noexcept( //-V730 - std::is_nothrow_default_constructible_v) - : Data() - { - } - - template >> - cache_aligned_data_derived(Ts&&... ts) noexcept( - std::is_nothrow_constructible_v) - : Data(HPX_FORWARD(Ts, ts)...) - { - } - - // cppcheck-suppress unusedVariable - char cacheline_pad[detail::get_cache_line_padding_size( - // NOLINTNEXTLINE(bugprone-sizeof-expression) - sizeof(Data))]; - }; + } - template - struct cache_aligned_data_derived : Data + // no need to pad to cache line size + Data data_; + }; + + /////////////////////////////////////////////////////////////////////////// + // special struct to ensure cache line alignment of a data type + template ::value> + struct cache_aligned_data_derived : Data + { + // We have an explicit (default) constructor here to avoid for the + // entire cache-line to be initialized by the compiler. + constexpr cache_aligned_data_derived() noexcept( //-V730 + std::is_nothrow_default_constructible_v) + : Data() { - constexpr cache_aligned_data_derived() noexcept( - std::is_nothrow_default_constructible_v) - : Data() - { - } - - template >> - cache_aligned_data_derived(Ts&&... ts) noexcept( - std::is_nothrow_constructible_v) - : Data(HPX_FORWARD(Ts, ts)...) - { - } - - // no need to pad to cache line size - }; + } -#if defined(HPX_MSVC) -#pragma warning(pop) -#endif + template >> + cache_aligned_data_derived(Ts&&... ts) noexcept( + std::is_nothrow_constructible_v) + : Data(HPX_FORWARD(Ts, ts)...) + { + } - /////////////////////////////////////////////////////////////////////////// - // special struct to data type is cache line aligned and fully occupies a - // cache line - template - using cache_line_data = cache_aligned_data; + // cppcheck-suppress unusedVariable + char cacheline_pad[detail::get_cache_line_padding_size( + // NOLINTNEXTLINE(bugprone-sizeof-expression) + sizeof(Data))]; + }; + + template + struct cache_aligned_data_derived : Data + { + constexpr cache_aligned_data_derived() noexcept( + std::is_nothrow_default_constructible_v) + : Data() + { + } - /////////////////////////////////////////////////////////////////////////// - template - constexpr auto align_up(T value, std::size_t alignment) noexcept + template >> + cache_aligned_data_derived(Ts&&... ts) noexcept( + std::is_nothrow_constructible_v) + : Data(HPX_FORWARD(Ts, ts)...) { - return T(hpx::bit_cast(value + (alignment - 1)) & - ~(alignment - 1)); } - } // namespace util -} // namespace hpx + // no need to pad to cache line size + }; + +#if defined(HPX_MSVC) +#pragma warning(pop) +#endif + + /////////////////////////////////////////////////////////////////////////// + // special struct to data type is cache line aligned and fully occupies a + // cache line + template + using cache_line_data = cache_aligned_data; + + /////////////////////////////////////////////////////////////////////////// + template + constexpr auto align_up(T value, std::size_t alignment) noexcept + { + return T(hpx::bit_cast(value + (alignment - 1)) & + ~(alignment - 1)); + } +} // namespace hpx::util diff --git a/libs/core/config/cmake/templates/cache_line_size.hpp.in b/libs/core/config/cmake/templates/cache_line_size.hpp.in new file mode 100644 index 000000000000..1d69475cc432 --- /dev/null +++ b/libs/core/config/cmake/templates/cache_line_size.hpp.in @@ -0,0 +1,17 @@ +// Copyright (c) 2024 Hartmut Kaiser +// +// SPDX-License-Identifier: BSL-1.0 +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#pragma once + +#include + +namespace hpx::threads { + + constexpr std::size_t get_cache_line_size() noexcept + { + return @HPX_INTERNAL_CACHE_LINE_SIZE_DETECT@; + } +} // namespace threads diff --git a/libs/core/config/include/hpx/config.hpp b/libs/core/config/include/hpx/config.hpp index c3b3425d4bad..2375a0bb04d6 100644 --- a/libs/core/config/include/hpx/config.hpp +++ b/libs/core/config/include/hpx/config.hpp @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include