Skip to content

Commit

Permalink
Merge #1730 Add custom thrust/cub namespace when it is supported
Browse files Browse the repository at this point in the history
This PR adds custom thrust namespace for hip and cuda when it is supported to reduce the potential conflict by thrust itself.
Hip enables it after 5.7 and cuda enable it after 11.6

Related PR: #1730
  • Loading branch information
yhmtsai authored Nov 26, 2024
2 parents 8fa7bc5 + b85680c commit 6f3cb5b
Show file tree
Hide file tree
Showing 24 changed files with 212 additions and 76 deletions.
16 changes: 16 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -72,9 +72,25 @@ gko_rename_cache(GINKGO_CUDA_COMPILER_FLAGS CMAKE_CUDA_FLAGS BOOL "Flags used by
# load executor-specific configuration
if(GINKGO_BUILD_CUDA)
include(cmake/cuda.cmake)
if(GINKGO_CUDA_CUSTOM_THRUST_NAMESPACE AND CUDAToolkit_VERSION VERSION_LESS 11.6)
message(STATUS "Disable custom thrust namespace for cuda before 11.6 because it has no effect in the thrust shipped by cuda before 11.6")
set(GINKGO_CUDA_CUSTOM_THRUST_NAMESPACE OFF)
else()
message(STATUS "Enable custom thrust namespace for cuda")
set(GINKGO_CUDA_CUSTOM_THRUST_NAMESPACE ON)
endif()
endif()
if(GINKGO_BUILD_HIP)
include(cmake/hip.cmake)
if(GINKGO_HIP_PLATFORM_AMD AND GINKGO_HIP_VERSION VERSION_LESS 5.7)
# Hip allow custom namespace but does not fully make everything in the custom namespace before rocm-5.7
# more specific pr: https://github.com/ROCm/rocThrust/pull/286
message(STATUS "Disable custom thrust namespace for hip before 5.7 because hip does not fully support it before 5.7")
set(GINKGO_HIP_CUSTOM_THRUST_NAMESPACE OFF)
else()
message(STATUS "Enable custom thrust namespace for hip")
set(GINKGO_HIP_CUSTOM_THRUST_NAMESPACE ON)
endif()
endif()
if(GINKGO_BUILD_SYCL)
include(cmake/sycl.cmake)
Expand Down
6 changes: 6 additions & 0 deletions cmake/create_test.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,9 @@ endfunction(ginkgo_create_cuda_test)
function(ginkgo_create_cuda_test_internal test_name filename test_target_name)
add_executable(${test_target_name} ${filename})
target_compile_definitions(${test_target_name} PRIVATE GKO_COMPILING_CUDA GKO_DEVICE_NAMESPACE=cuda)
if(GINKGO_CUDA_CUSTOM_THRUST_NAMESPACE)
target_compile_definitions(${test_target_name} PRIVATE THRUST_CUB_WRAPPED_NAMESPACE=gko)
endif()
if(MSVC)
target_compile_options(${test_target_name}
PRIVATE
Expand Down Expand Up @@ -187,6 +190,9 @@ function(ginkgo_create_hip_test_internal test_name filename test_target_name)
set_source_files_properties(${filename} PROPERTIES LANGUAGE HIP)
add_executable(${test_target_name} ${filename})
target_compile_definitions(${test_target_name} PRIVATE GKO_COMPILING_HIP GKO_DEVICE_NAMESPACE=hip)
if(GINKGO_HIP_CUSTOM_THRUST_NAMESPACE)
target_compile_definitions(${test_target_name} PRIVATE THRUST_CUB_WRAPPED_NAMESPACE=gko)
endif()
ginkgo_set_test_target_properties(${test_target_name} "_hip" ${ARGN})
ginkgo_add_test(${test_name} ${test_target_name} ${ARGN} RESOURCE_TYPE hipgpu)
endfunction(ginkgo_create_hip_test_internal)
Expand Down
1 change: 1 addition & 0 deletions common/cuda_hip/factorization/factorization_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include <ginkgo/core/base/array.hpp>

#include "common/cuda_hip/base/config.hpp"
#include "common/cuda_hip/base/math.hpp"
#include "common/cuda_hip/base/runtime.hpp"
#include "common/cuda_hip/base/types.hpp"
#include "common/cuda_hip/components/cooperative_groups.hpp"
Expand Down
1 change: 1 addition & 0 deletions common/cuda_hip/reorder/rcm_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include <ginkgo/core/matrix/permutation.hpp>
#include <ginkgo/core/matrix/sparsity_csr.hpp>

#include "common/cuda_hip/base/math.hpp"
#include "common/cuda_hip/base/thrust.hpp"
#include "common/cuda_hip/components/memory.hpp"
#include "common/cuda_hip/components/thread_ids.hpp"
Expand Down
1 change: 1 addition & 0 deletions common/cuda_hip/solver/multigrid_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include <ginkgo/core/base/math.hpp>
#include <ginkgo/core/base/types.hpp>

#include "common/cuda_hip/base/math.hpp"
#include "common/cuda_hip/base/runtime.hpp"
#include "common/cuda_hip/base/types.hpp"
#include "common/cuda_hip/components/thread_ids.hpp"
Expand Down
2 changes: 2 additions & 0 deletions common/unified/base/kernel_launch.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#if defined(GKO_COMPILING_CUDA)

#define GKO_KERNEL __device__
#include "common/cuda_hip/base/math.hpp"
#include "common/cuda_hip/base/types.hpp"


Expand All @@ -43,6 +44,7 @@ GKO_INLINE GKO_ATTRIBUTES constexpr unpack_member_type<T> unpack_member(T value)
#elif defined(GKO_COMPILING_HIP)

#define GKO_KERNEL __device__
#include "common/cuda_hip/base/math.hpp"
#include "common/cuda_hip/base/types.hpp"


Expand Down
11 changes: 9 additions & 2 deletions cuda/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,9 @@ endif()

ginkgo_compile_features(ginkgo_cuda)
target_compile_definitions(ginkgo_cuda PRIVATE GKO_COMPILING_CUDA GKO_DEVICE_NAMESPACE=cuda)
if(GINKGO_CUDA_CUSTOM_THRUST_NAMESPACE)
target_compile_definitions(ginkgo_cuda PRIVATE THRUST_CUB_WRAPPED_NAMESPACE=gko)
endif()

# include path for generated headers like jacobi_common.hpp
target_include_directories(ginkgo_cuda
Expand All @@ -83,8 +86,12 @@ target_link_libraries(ginkgo_cuda PUBLIC ginkgo_device ${CMAKE_DL_LIBS})
ginkgo_default_includes(ginkgo_cuda)
ginkgo_install_library(ginkgo_cuda)

if (GINKGO_CHECK_CIRCULAR_DEPS)
ginkgo_check_headers(ginkgo_cuda "GKO_COMPILING_CUDA;GKO_DEVICE_NAMESPACE=cuda")
if(GINKGO_CHECK_CIRCULAR_DEPS)
set(check_header_def "GKO_COMPILING_CUDA;GKO_DEVICE_NAMESPACE=cuda")
if(GINKGO_CUDA_CUSTOM_THRUST_NAMESPACE)
set(check_header_def "${check_header_def};THRUST_CUB_WRAPPED_NAMESPACE=gko")
endif()
ginkgo_check_headers(ginkgo_cuda "${check_header_def}")
endif()

if(GINKGO_BUILD_TESTS)
Expand Down
3 changes: 1 addition & 2 deletions cuda/base/config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,9 @@
#define GKO_CUDA_BASE_CONFIG_HPP_


#include <ginkgo/core/base/math.hpp>
#include <ginkgo/core/base/types.hpp>

#include "common/cuda_hip/base/math.hpp"


namespace gko {
namespace kernels {
Expand Down
18 changes: 0 additions & 18 deletions cuda/base/cublas_bindings.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@

#include <ginkgo/core/base/exception_helpers.hpp>

#include "common/cuda_hip/base/math.hpp"
#include "common/cuda_hip/base/types.hpp"


Expand Down Expand Up @@ -229,23 +228,6 @@ GKO_BIND_CUBLAS_NORM2(ValueType, detail::not_implemented);
#undef GKO_BIND_CUBLAS_NORM2


inline cublasHandle_t init(cudaStream_t stream)
{
cublasHandle_t handle;
GKO_ASSERT_NO_CUBLAS_ERRORS(cublasCreate(&handle));
GKO_ASSERT_NO_CUBLAS_ERRORS(
cublasSetPointerMode(handle, CUBLAS_POINTER_MODE_DEVICE));
GKO_ASSERT_NO_CUBLAS_ERRORS(cublasSetStream(handle, stream));
return handle;
}


inline void destroy(cublasHandle_t handle)
{
GKO_ASSERT_NO_CUBLAS_ERRORS(cublasDestroy(handle));
}


} // namespace cublas


Expand Down
43 changes: 43 additions & 0 deletions cuda/base/cublas_handle.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors
//
// SPDX-License-Identifier: BSD-3-Clause

#ifndef GKO_CUDA_BASE_CUBLAS_HANDLE_HPP_
#define GKO_CUDA_BASE_CUBLAS_HANDLE_HPP_


#include <cublas_v2.h>

#include <ginkgo/core/base/exception_helpers.hpp>


namespace gko {
namespace kernels {
namespace cuda {
namespace cublas {


inline cublasHandle_t init(cudaStream_t stream)
{
cublasHandle_t handle;
GKO_ASSERT_NO_CUBLAS_ERRORS(cublasCreate(&handle));
GKO_ASSERT_NO_CUBLAS_ERRORS(
cublasSetPointerMode(handle, CUBLAS_POINTER_MODE_DEVICE));
GKO_ASSERT_NO_CUBLAS_ERRORS(cublasSetStream(handle, stream));
return handle;
}


inline void destroy(cublasHandle_t handle)
{
GKO_ASSERT_NO_CUBLAS_ERRORS(cublasDestroy(handle));
}


} // namespace cublas
} // namespace cuda
} // namespace kernels
} // namespace gko


#endif // GKO_CUDA_BASE_CUBLAS_HANDLE_HPP_
7 changes: 3 additions & 4 deletions cuda/base/executor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include <thread>

#include <cuda_runtime.h>
#include <cublas_v2.h>

#include <ginkgo/config.hpp>
#include <ginkgo/core/base/device.hpp>
Expand All @@ -17,7 +18,8 @@
#include <ginkgo/core/base/memory.hpp>

#include "common/cuda_hip/base/config.hpp"
#include "cuda/base/cublas_bindings.hpp"
#include "common/cuda_hip/base/executor.hpp.inc"
#include "cuda/base/cublas_handle.hpp"
#include "cuda/base/cusparse_handle.hpp"
#include "cuda/base/device.hpp"
#include "cuda/base/scoped_device_id.hpp"
Expand All @@ -26,9 +28,6 @@
namespace gko {


#include "common/cuda_hip/base/executor.hpp.inc"


std::unique_ptr<CudaAllocatorBase> cuda_allocator_from_mode(
int device_id, allocation_mode mode)
{
Expand Down
1 change: 1 addition & 0 deletions cuda/get_info.cmake
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
ginkgo_print_module_header(${detailed_log} "CUDA")
ginkgo_print_variable(${detailed_log} "CMAKE_CUDA_ARCHITECTURES")
ginkgo_print_variable(${detailed_log} "GINKGO_CUDA_CUSTOM_THRUST_NAMESPACE")
ginkgo_print_module_footer(${detailed_log} "CUDA variables:")
ginkgo_print_variable(${detailed_log} "CMAKE_CUDA_COMPILER")
ginkgo_print_variable(${detailed_log} "CMAKE_CUDA_COMPILER_VERSION")
Expand Down
5 changes: 3 additions & 2 deletions cuda/test/base/math.cu
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@
#include "cuda/test/utils.hpp"


namespace {
// put the test in gko namespace to easily adapt the thrust/cub in gko or not
namespace gko {
namespace kernel {


Expand Down Expand Up @@ -120,4 +121,4 @@ TEST_F(IsFinite, DoubleComplex)
}
} // namespace
} // namespace gko
3 changes: 2 additions & 1 deletion cuda/test/components/cooperative_groups.cu
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,8 @@ __global__ void cg_shuffle(bool* s)
group::tiled_partition<config::warp_size>(group::this_thread_block());
auto i = int(group.thread_rank());
test_assert(s, group.shfl_up(i, 1) == max(0, i - 1));
test_assert(s, group.shfl_down(i, 1) == min(i + 1, config::warp_size - 1));
test_assert(s, group.shfl_down(i, 1) ==
min(i + 1, static_cast<int>(config::warp_size) - 1));
test_assert(s, group.shfl(i, 0) == 0);
}
Expand Down
11 changes: 9 additions & 2 deletions hip/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,9 @@ target_include_directories(ginkgo_hip
${CMAKE_CURRENT_BINARY_DIR} # for generated headers like jacobi_common.hip.hpp
)
target_compile_definitions(ginkgo_hip PRIVATE GKO_COMPILING_HIP GKO_DEVICE_NAMESPACE=hip)
if(GINKGO_HIP_CUSTOM_THRUST_NAMESPACE)
target_compile_definitions(ginkgo_hip PRIVATE THRUST_CUB_WRAPPED_NAMESPACE=gko)
endif()

target_link_libraries(ginkgo_hip PUBLIC ginkgo_device)
target_link_libraries(ginkgo_hip PRIVATE hip::host roc::hipblas roc::hipsparse hip::hiprand roc::rocrand roc::rocthrust)
Expand All @@ -80,8 +83,12 @@ ginkgo_compile_features(ginkgo_hip)
ginkgo_default_includes(ginkgo_hip)
ginkgo_install_library(ginkgo_hip)

if (GINKGO_CHECK_CIRCULAR_DEPS)
ginkgo_check_headers(ginkgo_hip "GKO_COMPILING_HIP;GKO_DEVICE_NAMESPACE=hip")
if(GINKGO_CHECK_CIRCULAR_DEPS)
set(check_header_def "GKO_COMPILING_HIP;GKO_DEVICE_NAMESPACE=hip")
if(GINKGO_HIP_CUSTOM_THRUST_NAMESPACE)
set(check_header_def "${check_header_def};THRUST_CUB_WRAPPED_NAMESPACE=gko")
endif()
ginkgo_check_headers(ginkgo_hip "${check_header_def}")
endif()

if(GINKGO_BUILD_TESTS)
Expand Down
2 changes: 1 addition & 1 deletion hip/base/config.hip.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@
#define GKO_HIP_BASE_CONFIG_HIP_HPP_


#include <ginkgo/core/base/math.hpp>
#include <ginkgo/core/base/types.hpp>

#include "common/cuda_hip/base/math.hpp"
#include "common/cuda_hip/base/runtime.hpp"


Expand Down
4 changes: 2 additions & 2 deletions hip/base/executor.hip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@
#include "common/cuda_hip/base/config.hpp"
#include "common/cuda_hip/base/runtime.hpp"
#include "hip/base/device.hpp"
#include "hip/base/hipblas_bindings.hip.hpp"
#include "hip/base/hipsparse_bindings.hip.hpp"
#include "hip/base/hipblas_handle.hpp"
#include "hip/base/hipsparse_handle.hpp"
#include "hip/base/scoped_device_id.hip.hpp"


Expand Down
18 changes: 0 additions & 18 deletions hip/base/hipblas_bindings.hip.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -240,24 +240,6 @@ GKO_BIND_HIPBLAS_NORM2(ValueType, detail::not_implemented);
#undef GKO_BIND_HIPBLAS_NORM2


inline hipblasContext* init(hipStream_t stream)
{
hipblasHandle_t handle;
GKO_ASSERT_NO_HIPBLAS_ERRORS(hipblasCreate(&handle));
GKO_ASSERT_NO_HIPBLAS_ERRORS(
hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE));
GKO_ASSERT_NO_HIPBLAS_ERRORS(hipblasSetStream(handle, stream));
return reinterpret_cast<hipblasContext*>(handle);
}


inline void destroy_hipblas_handle(hipblasContext* handle)
{
GKO_ASSERT_NO_HIPBLAS_ERRORS(
hipblasDestroy(reinterpret_cast<hipblasHandle_t>(handle)));
}


} // namespace hipblas


Expand Down
48 changes: 48 additions & 0 deletions hip/base/hipblas_handle.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors
//
// SPDX-License-Identifier: BSD-3-Clause

#ifndef GKO_HIP_BASE_HIPBLAS_HANDLE_HPP_
#define GKO_HIP_BASE_HIPBLAS_HANDLE_HPP_


#if HIP_VERSION >= 50200000
#include <hipblas/hipblas.h>
#else
#include <hipblas.h>
#endif

#include <ginkgo/core/base/exception_helpers.hpp>


namespace gko {
namespace kernels {
namespace hip {
namespace hipblas {


inline hipblasContext* init(hipStream_t stream)
{
hipblasHandle_t handle;
GKO_ASSERT_NO_HIPBLAS_ERRORS(hipblasCreate(&handle));
GKO_ASSERT_NO_HIPBLAS_ERRORS(
hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE));
GKO_ASSERT_NO_HIPBLAS_ERRORS(hipblasSetStream(handle, stream));
return reinterpret_cast<hipblasContext*>(handle);
}


inline void destroy_hipblas_handle(hipblasContext* handle)
{
GKO_ASSERT_NO_HIPBLAS_ERRORS(
hipblasDestroy(reinterpret_cast<hipblasHandle_t>(handle)));
}


} // namespace hipblas
} // namespace hip
} // namespace kernels
} // namespace gko


#endif // GKO_HIP_BASE_HIPBLAS_HANDLE_HPP_
Loading

0 comments on commit 6f3cb5b

Please sign in to comment.