Skip to content

Commit

Permalink
addcuda2
Browse files Browse the repository at this point in the history
  • Loading branch information
archibate committed Sep 5, 2024
1 parent ebaa3de commit 8c890ba
Show file tree
Hide file tree
Showing 8 changed files with 89 additions and 21 deletions.
5 changes: 4 additions & 1 deletion slides/moderncuda/README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
# 现代 C++ 的 CUDA 编程

参考资料:https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html
参考资料:

- https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html
- https://www.cs.sfu.ca/~ashriram/Courses/CS431/assets/lectures/Part8/GPU1.pdf

## 配置 CUDA 开发环境

Expand Down
52 changes: 40 additions & 12 deletions slides/moderncuda/better_cuda.cuh → slides/moderncuda/cudapp.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#include <cstddef>
#include <cstdio>
#include <cstdlib>
#include <cstdarg>
#include <cuda_runtime.h>
#include <memory>
#include <new>
Expand All @@ -11,7 +12,7 @@
#include <utility>
#include <vector>

namespace cupp {
namespace cudapp {

std::error_category const &cudaErrorCategory() noexcept {
static struct : std::error_category {
Expand Down Expand Up @@ -40,7 +41,7 @@ void throwCudaError(cudaError_t err, char const *file, int line) {
do { \
cudaError_t err = (expr); \
if (err != cudaSuccess) [[unlikely]] { \
::cupp::throwCudaError(err, __FILE__, __LINE__); \
::cudapp::throwCudaError(err, __FILE__, __LINE__); \
} \
} while (0)

Expand Down Expand Up @@ -265,10 +266,22 @@ public:
}
};

void synchronize() const {
void join() const {
CHECK_CUDA(cudaEventSynchronize(*this));
}

bool joinReady() const {
cudaError_t res = cudaEventQuery(*this);
if (res == cudaSuccess) {
return true;
}
if (res == cudaErrorNotReady) {
return false;
}
CHECK_CUDA(res);
return false;
}

float elapsedMillis(CudaEvent const &event) const {
float result;
CHECK_CUDA(cudaEventElapsedTime(&result, *this, event));
Expand Down Expand Up @@ -315,10 +328,6 @@ public:
return CudaStream(nullptr);
}

void synchronize() const {
CHECK_CUDA(cudaStreamSynchronize(*this));
}

void copy(void *dst, void *src, size_t size, cudaMemcpyKind kind) const {
CHECK_CUDA(cudaMemcpyAsync(dst, src, size, kind, *this));
}
Expand Down Expand Up @@ -348,23 +357,27 @@ public:
CHECK_CUDA(cudaStreamWaitEvent(*this, event, flags));
}

void asyncWait(cudaStreamCallback_t callback, void *userData) const {
void join() const {
CHECK_CUDA(cudaStreamSynchronize(*this));
}

void joinAsync(cudaStreamCallback_t callback, void *userData) const {
CHECK_CUDA(cudaStreamAddCallback(*this, callback, userData, 0));
}

template <class Func>
void asyncWait(Func &&func) const {
void joinAsync(Func &&func) const {
auto userData = std::make_unique<Func>();
cudaStreamCallback_t callback = [](cudaStream_t stream,
cudaError_t status, void *userData) {
std::unique_ptr<Func> func(static_cast<Func *>(userData));
(*func)(stream, status);
};
asyncWait(callback, userData.get());
joinAsync(callback, userData.get());
userData.release();
}

bool pollWait() {
bool joinReady() const {
cudaError_t res = cudaStreamQuery(*this);
if (res == cudaSuccess) {
return true;
Expand Down Expand Up @@ -418,7 +431,7 @@ struct CudaAllocator : private Arena {
if (res == cudaErrorMemoryAllocation) [[unlikely]] {
throw std::bad_alloc();
}
CHECK_CUDA(("Arena::doMalloc", res));
CHECK_CUDA(res /* Arena::doMalloc */);
return static_cast<T *>(ptr);
}

Expand Down Expand Up @@ -459,6 +472,21 @@ struct CudaAllocator : private Arena {
template <class T>
using CudaVector = std::vector<T, CudaAllocator<T>>;

#if defined(__clang__) && defined(__CUDACC__) && defined(__GLIBCXX__)
__host__ __device__ static void printf(const char *fmt, ...) {
va_list args;
va_start(args, fmt);
#if __CUDA_ARCH__
::vprintf(fmt, (const char *)args);
#else
::vprintf(fmt, args);
#endif
va_end(args);
}
#else
using ::printf;
#endif

// #if __cpp_lib_memory_resource
// template <class Arena>
// struct CudaResource : std::pmr::memory_resource, private Arena {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,7 @@
}

__device__ void device_func() {
auto t = cooperative_groups::this_thread();
t.size();
std::sin(1);
}

__host__ __device__ void host_device_func() {
Expand Down
File renamed without changes.
File renamed without changes.
33 changes: 33 additions & 0 deletions slides/moderncuda/main.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#include <cuda_runtime.h>
#include <nvfunctional>
#include "cudapp.cuh"

using namespace cudapp;

extern "C" __global__ void kernel(int x) {
printf("内核参数 x = %d\n", x);
printf("线程编号 (%d, %d)\n", blockIdx.x, threadIdx.x);
}

int main() {
int x = 42;
kernel<<<3, 4, 0, 0>>>(x);

void *args[] = {&x};
CHECK_CUDA(cudaLaunchKernel((const void *)kernel, dim3(3), dim3(4), args, 0, 0));

cudaLaunchConfig_t cfg{};
cfg.blockDim = dim3(3);
cfg.gridDim = dim3(4);
cfg.dynamicSmemBytes = 0;
cfg.stream = 0;
cfg.attrs = nullptr;
cfg.numAttrs = 0;
CHECK_CUDA(cudaLaunchKernelEx(&cfg, kernel, x));

const char *name;
CHECK_CUDA(cudaFuncGetName(&name, (const void *)kernel));

CudaStream::nullStream().join();
return 0;
}
7 changes: 1 addition & 6 deletions slides/moderncuda/tinybench.cpp
Original file line number Diff line number Diff line change
@@ -1,8 +1,3 @@
#define TINYBENCH_IMPL_MAIN
#define TINYBENCH_IMPL
#include "tinybench.hpp"

[[gnu::weak]] int main() {
std::unique_ptr<tinybench::Reporter> rep(tinybench::makeMultipleReporter({tinybench::makeConsoleReporter()}));
rep->run_all();
return 0;
}
10 changes: 10 additions & 0 deletions slides/moderncuda/tinybench.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -885,3 +885,13 @@ Reporter *makeMultipleReporter(std::vector<Reporter *> const &reporters) {

}
#endif

#ifdef TINYBENCH_IMPL_MAIN
#include <memory>

[[gnu::weak]] int main() {
std::unique_ptr<tinybench::Reporter> rep(tinybench::makeMultipleReporter({tinybench::makeConsoleReporter()}));
rep->run_all();
return 0;
}
#endif

0 comments on commit 8c890ba

Please sign in to comment.